From a4031a27293a248ff5e99bde4a847de6dfd69c9a Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 6 Feb 2024 17:38:49 +0800 Subject: [PATCH 01/32] Implement SQL logging --- .../pom.xml | 6 + .../components/executor/Executor.java | 8 +- .../util/PlaceholderValueAbstract.java | 34 ++++++ .../components/util/SqlLogging.java | 22 ++++ .../persistence/components/util/SqlUtils.java | 70 +++++++++++ .../relational/ansi/AnsiSqlSink.java | 4 +- .../relational/bigquery/BigQuerySink.java | 7 +- .../bigquery/executor/BigQueryExecutor.java | 45 ++++--- .../components/relational/RelationalSink.java | 3 +- .../api/RelationalIngestorAbstract.java | 114 +++++++++++------- .../executor/RelationalExecutor.java | 37 +++--- .../components/relational/h2/H2Sink.java | 7 +- .../ingestmode/bulkload/BulkLoadTest.java | 19 ++- .../relational/snowflake/SnowflakeSink.java | 7 +- 14 files changed, 287 insertions(+), 96 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/PlaceholderValueAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/SqlLogging.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/SqlUtils.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/pom.xml index 9d8026d9122..1206fd75e29 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/pom.xml @@ -48,6 +48,12 @@ jackson-databind + + + org.slf4j + slf4j-api + + commons-codec diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java index 4219e6227a9..543a2076f4f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java @@ -17,6 +17,8 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlan; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.SqlLogging; import java.util.List; import java.util.Map; @@ -25,11 +27,11 @@ public interface Executor placeholderKeyValues); + void executePhysicalPlan(P physicalPlan, Map placeholderKeyValues); List executePhysicalPlanAndGetResults(P physicalPlan); - List executePhysicalPlanAndGetResults(P physicalPlan, Map placeholderKeyValues); + List executePhysicalPlanAndGetResults(P physicalPlan, Map placeholderKeyValues); boolean datasetExists(Dataset dataset); @@ -37,6 +39,8 @@ public interface Executor placeholderKeyValues, String sql) + { + String enrichedSql = sql; + for (Map.Entry entry : placeholderKeyValues.entrySet()) + { + enrichedSql = enrichedSql.replaceAll(Pattern.quote(entry.getKey()), entry.getValue().value()); + } + return enrichedSql; + } + + private static String getEnrichedSqlWithMasking(Map placeholderKeyValues, String sql) + { + String enrichedSql = sql; + for (Map.Entry entry : placeholderKeyValues.entrySet()) + { + if (!entry.getValue().isSensitive()) + { + enrichedSql = enrichedSql.replaceAll(Pattern.quote(entry.getKey()), entry.getValue().value()); + } + } + return enrichedSql; + } + + public static void logSql(Logger logger, SqlLogging sqlLogging, String sqlBeforeReplacingPlaceholders, String sqlAfterReplacingPlaceholders, Map placeholderKeyValues) + { + switch (sqlLogging) + { + case MASKED: + String maskedSql = getEnrichedSqlWithMasking(placeholderKeyValues, sqlBeforeReplacingPlaceholders); + logger.info(maskedSql); + return; + case UNMASKED: + logger.info(sqlAfterReplacingPlaceholders); + return; + case DISABLED: + } + } + + public static void logSql(Logger logger, SqlLogging sqlLogging, String sql) + { + if (!sqlLogging.equals(SqlLogging.DISABLED)) + { + logger.info(sql); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 3ff13255881..972f6179de7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -154,6 +154,7 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.util.Capability; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import java.util.Collections; import java.util.HashMap; @@ -323,7 +324,8 @@ private static Map, LogicalPlanVisitor> rightBiasedUnion(Map executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) + @Override + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { throw new UnsupportedOperationException("Bulk Load not supported!"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 1ee5e46a5d2..db0a95ebb7b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -77,6 +77,7 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.util.Capability; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import java.util.Arrays; import java.util.Collections; @@ -262,7 +263,7 @@ public Field createNewField(Field evolveTo, Field evolveFrom, Optional } @Override - public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { BigQueryExecutor bigQueryExecutor = (BigQueryExecutor) executor; Map stats = bigQueryExecutor.executeLoadPhysicalPlanAndGetStats(ingestSqlPlan, placeHolderKeyValues); @@ -270,8 +271,8 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor { private final BigQuerySink bigQuerySink; private final BigQueryHelper bigQueryHelper; + private SqlLogging sqlLogging = SqlLogging.DISABLED; + + private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryExecutor.class); public BigQueryExecutor(BigQuerySink bigQuerySink, BigQueryHelper bigQueryHelper) { @@ -48,7 +55,7 @@ public void executePhysicalPlan(SqlPlan physicalPlan) } @Override - public void executePhysicalPlan(SqlPlan physicalPlan, Map placeholderKeyValues) + public void executePhysicalPlan(SqlPlan physicalPlan, Map placeholderKeyValues) { boolean containsDDLStatements = physicalPlan.ops().stream().anyMatch(DDLStatement.class::isInstance); List sqlList = physicalPlan.getSqlList(); @@ -57,7 +64,8 @@ public void executePhysicalPlan(SqlPlan physicalPlan, Map placeh { for (String sql : sqlList) { - String enrichedSql = getEnrichedSql(placeholderKeyValues, sql); + String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, sql); + SqlUtils.logSql(LOGGER, sqlLogging, sql, enrichedSql, placeholderKeyValues); bigQueryHelper.executeQuery(enrichedSql); } } @@ -65,15 +73,18 @@ public void executePhysicalPlan(SqlPlan physicalPlan, Map placeh { for (String sql : sqlList) { - String enrichedSql = getEnrichedSql(placeholderKeyValues, sql); + String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, sql); + SqlUtils.logSql(LOGGER, sqlLogging, sql, enrichedSql, placeholderKeyValues); bigQueryHelper.executeStatement(enrichedSql); } } } - public Map executeLoadPhysicalPlanAndGetStats(SqlPlan physicalPlan, Map placeholderKeyValues) + public Map executeLoadPhysicalPlanAndGetStats(SqlPlan physicalPlan, Map placeholderKeyValues) { - return bigQueryHelper.executeLoadStatement(getEnrichedSql(placeholderKeyValues, physicalPlan.getSqlList().get(0))); + String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, physicalPlan.getSqlList().get(0)); + SqlUtils.logSql(LOGGER, sqlLogging, physicalPlan.getSqlList().get(0), enrichedSql, placeholderKeyValues); + return bigQueryHelper.executeLoadStatement(enrichedSql); } @Override @@ -83,12 +94,13 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) } @Override - public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) + public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) { List resultSetList = new ArrayList<>(); for (String sql : physicalPlan.getSqlList()) { - String enrichedSql = getEnrichedSql(placeholderKeyValues, sql); + String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, sql); + SqlUtils.logSql(LOGGER, sqlLogging, sql, enrichedSql, placeholderKeyValues); List> queryResult = bigQueryHelper.executeQuery(enrichedSql); if (!queryResult.isEmpty()) { @@ -116,6 +128,12 @@ public Dataset constructDatasetFromDatabase(Dataset dataset) return bigQuerySink.constructDatasetFromDatabaseFn().execute(this, bigQueryHelper, dataset); } + @Override + public void setSqlLogging(SqlLogging sqlLogging) + { + this.sqlLogging = sqlLogging; + } + @Override public void begin() { @@ -145,15 +163,4 @@ public RelationalExecutionHelper getRelationalExecutionHelper() { return this.bigQueryHelper; } - - private String getEnrichedSql(Map placeholderKeyValues, String sql) - { - String enrichedSql = sql; - for (Map.Entry entry : placeholderKeyValues.entrySet()) - { - enrichedSql = enrichedSql.replaceAll(Pattern.quote(entry.getKey()), entry.getValue()); - } - return enrichedSql; - } - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index d58f1472853..723cb7e3a7d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -31,6 +31,7 @@ import org.finos.legend.engine.persistence.components.sink.Sink; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.util.Capability; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import java.util.Collections; import java.util.Map; @@ -189,5 +190,5 @@ public interface ConstructDatasetFromDatabase Dataset execute(Executor executor, RelationalExecutionHelper sink, Dataset dataset); } - public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); + public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index ff8f163c922..fe4d5b0ee10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.api; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.*; import org.finos.legend.engine.persistence.components.executor.DigestInfo; @@ -27,7 +28,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.planner.Planner; -import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.planner.Planners; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -41,7 +41,9 @@ import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; +import org.finos.legend.engine.persistence.components.util.SqlLogging; import org.immutables.value.Value.Default; import org.immutables.value.Value.Derived; import org.immutables.value.Value.Immutable; @@ -76,6 +78,9 @@ public abstract class RelationalIngestorAbstract public static final String BATCH_ID_PATTERN = "{NEXT_BATCH_ID_PATTERN}"; public static final String BATCH_START_TS_PATTERN = "{BATCH_START_TIMESTAMP_PLACEHOLDER}"; private static final String BATCH_END_TS_PATTERN = "{BATCH_END_TIMESTAMP_PLACEHOLDER}"; + private static final String ADDITIONAL_METADATA_KEY_PATTERN = "{ADDITIONAL_METADATA_KEY_PLACEHOLDER}"; + private static final String ADDITIONAL_METADATA_VALUE_PATTERN = "{ADDITIONAL_METADATA_VALUE_PLACEHOLDER}"; + private static final String ADDITIONAL_METADATA_PLACEHOLDER_KEY_PATTERN = "{\"" + ADDITIONAL_METADATA_KEY_PATTERN + "\":\"" + ADDITIONAL_METADATA_VALUE_PATTERN + "\"}"; private static final Logger LOGGER = LoggerFactory.getLogger(RelationalIngestor.class); @@ -145,6 +150,12 @@ public Set schemaEvolutionCapabilitySet() public abstract Optional bulkLoadEventIdValue(); + @Default + public SqlLogging sqlLogging() + { + return SqlLogging.DISABLED; + } + @Default public String batchSuccessStatusValue() { @@ -157,21 +168,6 @@ public String batchSuccessStatusValue() public abstract RelationalSink relationalSink(); - @Derived - protected PlannerOptions plannerOptions() - { - return PlannerOptions.builder() - .cleanupStagingData(cleanupStagingData()) - .collectStatistics(collectStatistics()) - .enableSchemaEvolution(enableSchemaEvolution()) - .createStagingDataset(createStagingDataset()) - .enableConcurrentSafety(enableConcurrentSafety()) - .putAllAdditionalMetadata(additionalMetadata()) - .bulkLoadEventIdValue(bulkLoadEventIdValue()) - .batchSuccessStatusValue(batchSuccessStatusValue()) - .build(); - } - @Derived protected TransformOptions transformOptions() { @@ -216,6 +212,7 @@ public Executor initExecutor(RelationalConnection connection) { LOGGER.info("Invoked initExecutor method, will initialize the executor"); this.executor = relationalSink().getRelationalExecutor(connection); + this.executor.setSqlLogging(sqlLogging()); return executor; } @@ -226,6 +223,7 @@ public void initExecutor(Executor executor) { LOGGER.info("Invoked initExecutor method, will initialize the executor"); this.executor = executor; + this.executor.setSqlLogging(sqlLogging()); } /* @@ -444,8 +442,8 @@ private void initializeLock() if (enableConcurrentSafety()) { LOGGER.info("Concurrent safety is enabled, Initializing lock"); - Map placeHolderKeyValues = new HashMap<>(); - placeHolderKeyValues.put(BATCH_START_TS_PATTERN, LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER)); + Map placeHolderKeyValues = new HashMap<>(); + placeHolderKeyValues.put(BATCH_START_TS_PATTERN, PlaceholderValue.of(LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER), false)); try { executor.executePhysicalPlan(generatorResult.initializeLockSqlPlan().orElseThrow(IllegalStateException::new), placeHolderKeyValues); @@ -463,8 +461,8 @@ private void acquireLock() if (enableConcurrentSafety()) { LOGGER.info("Concurrent safety is enabled, Acquiring lock"); - Map placeHolderKeyValues = new HashMap<>(); - placeHolderKeyValues.put(BATCH_START_TS_PATTERN, LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER)); + Map placeHolderKeyValues = new HashMap<>(); + placeHolderKeyValues.put(BATCH_START_TS_PATTERN, PlaceholderValue.of(LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER), false)); executor.executePhysicalPlan(generatorResult.acquireLockSqlPlan().orElseThrow(IllegalStateException::new), placeHolderKeyValues); } } @@ -588,7 +586,14 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) // 7. Enrich temp Datasets enrichedDatasets = enrichedIngestMode.accept(new TempDatasetsEnricher(enrichedDatasets)); - // 8. generate sql plans + // 8. Put additional metadata into placeholder + Map placeholderAdditionalMetadata = new HashMap<>(); + if (!additionalMetadata().isEmpty()) + { + placeholderAdditionalMetadata = Collections.singletonMap(ADDITIONAL_METADATA_KEY_PATTERN, ADDITIONAL_METADATA_VALUE_PATTERN); + } + + // 9. generate sql plans RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(enrichedIngestMode) .relationalSink(relationalSink()) @@ -597,17 +602,18 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) .createStagingDataset(createStagingDataset()) .enableSchemaEvolution(enableSchemaEvolution()) .addAllSchemaEvolutionCapabilitySet(schemaEvolutionCapabilitySet()) + .enableConcurrentSafety(enableConcurrentSafety()) .caseConversion(caseConversion()) .executionTimestampClock(executionTimestampClock()) .batchStartTimestampPattern(BATCH_START_TS_PATTERN) .batchEndTimestampPattern(BATCH_END_TS_PATTERN) .batchIdPattern(BATCH_ID_PATTERN) - .putAllAdditionalMetadata(additionalMetadata()) + .putAllAdditionalMetadata(placeholderAdditionalMetadata) .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) .build(); - planner = Planners.get(enrichedDatasets, enrichedIngestMode, plannerOptions(), relationalSink().capabilities()); + planner = Planners.get(enrichedDatasets, enrichedIngestMode, generator.plannerOptions(), relationalSink().capabilities()); generatorResult = generator.generateOperations(enrichedDatasets, resourcesBuilder.build(), planner, enrichedIngestMode); datasetsInitialized = true; return enrichedDatasets; @@ -624,17 +630,17 @@ private List performIngestion(Datasets datasets, Transformer dataSplitRange = Optional.ofNullable(dataSplitsCount == 0 ? null : dataSplitRanges.get(dataSplitIndex)); // Extract the Placeholders values - Map placeHolderKeyValues = extractPlaceHolderKeyValues(datasets, executor, planner, transformer, ingestMode, dataSplitRange); + Map placeHolderKeyValues = extractPlaceHolderKeyValues(datasets, executor, planner, transformer, ingestMode, dataSplitRange); // Load main table, extract stats and update metadata table Map statisticsResultMap = loadData(executor, generatorResult, placeHolderKeyValues); IngestorResult result = IngestorResult.builder() .putAllStatisticByName(statisticsResultMap) .updatedDatasets(datasets) - .batchId(Optional.ofNullable(placeHolderKeyValues.containsKey(BATCH_ID_PATTERN) ? Integer.valueOf(placeHolderKeyValues.get(BATCH_ID_PATTERN)) : null)) + .batchId(Optional.ofNullable(placeHolderKeyValues.containsKey(BATCH_ID_PATTERN) ? Integer.valueOf(placeHolderKeyValues.get(BATCH_ID_PATTERN).value()) : null)) .dataSplitRange(dataSplitRange) .schemaEvolutionSql(schemaEvolutionResult.schemaEvolutionSql()) .status(IngestStatus.SUCCEEDED) - .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN).value()) .build(); results.add(result); dataSplitIndex++; @@ -645,7 +651,7 @@ private List performIngestion(Datasets datasets, Transformer loadData(Executor executor, GeneratorResult generatorResult, Map placeHolderKeyValues) + private Map loadData(Executor executor, GeneratorResult generatorResult, Map placeHolderKeyValues) { // Extract preIngest Statistics Map statisticsResultMap = new HashMap<>( @@ -657,7 +663,7 @@ private Map loadData(Executor performBulkLoad(Datasets datasets, Transformer results = new ArrayList<>(); - Map placeHolderKeyValues = extractPlaceHolderKeyValues(datasets, executor, planner, transformer, ingestMode, Optional.empty()); + Map placeHolderKeyValues = extractPlaceHolderKeyValues(datasets, executor, planner, transformer, ingestMode, Optional.empty()); // Execute ingest SqlPlan IngestorResult result = relationalSink().performBulkLoad(datasets, executor, generatorResult.ingestSqlPlan(), generatorResult.postIngestStatisticsSqlPlan(), placeHolderKeyValues); @@ -678,8 +684,8 @@ private List performBulkLoad(Datasets datasets, Transformer trans private Map executeStatisticsPhysicalPlan(Executor executor, Map statisticsSqlPlan, - Map placeHolderKeyValues) + Map placeHolderKeyValues) { Map results = new HashMap<>(); for (Map.Entry entry: statisticsSqlPlan.entrySet()) @@ -772,18 +778,22 @@ private Map executeDeduplicationAndVersi return results; } - private Map extractPlaceHolderKeyValues(Datasets datasets, Executor executor, + private Map extractPlaceHolderKeyValues(Datasets datasets, Executor executor, Planner planner, Transformer transformer, IngestMode ingestMode, Optional dataSplitRange) { - Map placeHolderKeyValues = new HashMap<>(); + Map placeHolderKeyValues = new HashMap<>(); + + // Handle batch ID Optional nextBatchId = ApiUtils.getNextBatchId(datasets, executor, transformer); - Optional>> optimizationFilters = ApiUtils.getOptimizationFilterBounds(datasets, executor, transformer, ingestMode); if (nextBatchId.isPresent()) { LOGGER.info(String.format("Obtained the next Batch id: %s", nextBatchId.get())); - placeHolderKeyValues.put(BATCH_ID_PATTERN, nextBatchId.get().toString()); + placeHolderKeyValues.put(BATCH_ID_PATTERN, PlaceholderValue.of(nextBatchId.get().toString(), false)); } + + // Handle optimization filters + Optional>> optimizationFilters = ApiUtils.getOptimizationFilterBounds(datasets, executor, transformer, ingestMode); if (optimizationFilters.isPresent()) { for (OptimizationFilter filter : optimizationFilters.get().keySet()) @@ -792,13 +802,13 @@ private Map extractPlaceHolderKeyValues(Datasets datasets, Execu Object upperBound = optimizationFilters.get().get(filter).getTwo(); if (lowerBound instanceof Date) { - placeHolderKeyValues.put(filter.lowerBoundPattern(), lowerBound.toString()); - placeHolderKeyValues.put(filter.upperBoundPattern(), upperBound.toString()); + placeHolderKeyValues.put(filter.lowerBoundPattern(), PlaceholderValue.of(lowerBound.toString(), true)); + placeHolderKeyValues.put(filter.upperBoundPattern(), PlaceholderValue.of(upperBound.toString(), true)); } else if (lowerBound instanceof Number) { - placeHolderKeyValues.put(SINGLE_QUOTE + filter.lowerBoundPattern() + SINGLE_QUOTE, lowerBound.toString()); - placeHolderKeyValues.put(SINGLE_QUOTE + filter.upperBoundPattern() + SINGLE_QUOTE, upperBound.toString()); + placeHolderKeyValues.put(SINGLE_QUOTE + filter.lowerBoundPattern() + SINGLE_QUOTE, PlaceholderValue.of(lowerBound.toString(), true)); + placeHolderKeyValues.put(SINGLE_QUOTE + filter.upperBoundPattern() + SINGLE_QUOTE, PlaceholderValue.of(upperBound.toString(), true)); } else { @@ -806,13 +816,29 @@ else if (lowerBound instanceof Number) } } } + + // Handle data splits if (planner.dataSplitExecutionSupported() && dataSplitRange.isPresent()) { - placeHolderKeyValues.put(SINGLE_QUOTE + LogicalPlanUtils.DATA_SPLIT_LOWER_BOUND_PLACEHOLDER + SINGLE_QUOTE, String.valueOf(dataSplitRange.get().lowerBound())); - placeHolderKeyValues.put(SINGLE_QUOTE + LogicalPlanUtils.DATA_SPLIT_UPPER_BOUND_PLACEHOLDER + SINGLE_QUOTE, String.valueOf(dataSplitRange.get().upperBound())); + placeHolderKeyValues.put(SINGLE_QUOTE + LogicalPlanUtils.DATA_SPLIT_LOWER_BOUND_PLACEHOLDER + SINGLE_QUOTE, PlaceholderValue.of(String.valueOf(dataSplitRange.get().lowerBound()), false)); + placeHolderKeyValues.put(SINGLE_QUOTE + LogicalPlanUtils.DATA_SPLIT_UPPER_BOUND_PLACEHOLDER + SINGLE_QUOTE, PlaceholderValue.of(String.valueOf(dataSplitRange.get().upperBound()), false)); + } + + // Handle additional metadata + try + { + ObjectMapper objectMapper = new ObjectMapper(); + String additionalMetadataString = objectMapper.writeValueAsString(additionalMetadata()); + placeHolderKeyValues.put(ADDITIONAL_METADATA_PLACEHOLDER_KEY_PATTERN, PlaceholderValue.of(additionalMetadataString, true)); + } + catch (JsonProcessingException e) + { + throw new IllegalStateException("Unable to parse additional metadata"); } - placeHolderKeyValues.put(BATCH_START_TS_PATTERN, LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER)); + + // Handle batch timestamp + placeHolderKeyValues.put(BATCH_START_TS_PATTERN, PlaceholderValue.of(LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER), false)); + return placeHolderKeyValues; } - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java index 8a7e014048d..8e8e509abf3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java @@ -21,16 +21,23 @@ import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.SqlLogging; +import org.finos.legend.engine.persistence.components.util.SqlUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.regex.Pattern; public class RelationalExecutor implements Executor { private final RelationalSink relationalSink; private final RelationalExecutionHelper relationalExecutionHelper; + private SqlLogging sqlLogging = SqlLogging.DISABLED; + + private static final Logger LOGGER = LoggerFactory.getLogger(RelationalExecutor.class); public RelationalExecutor(RelationalSink relationalSink, RelationalExecutionHelper relationalExecutionHelper) { @@ -42,16 +49,18 @@ public RelationalExecutor(RelationalSink relationalSink, RelationalExecutionHelp public void executePhysicalPlan(SqlPlan physicalPlan) { List sqlList = physicalPlan.getSqlList(); + sqlList.forEach(sql -> SqlUtils.logSql(LOGGER, sqlLogging, sql)); relationalExecutionHelper.executeStatements(sqlList); } @Override - public void executePhysicalPlan(SqlPlan physicalPlan, Map placeholderKeyValues) + public void executePhysicalPlan(SqlPlan physicalPlan, Map placeholderKeyValues) { List sqlList = physicalPlan.getSqlList(); for (String sql : sqlList) { - String enrichedSql = getEnrichedSql(placeholderKeyValues, sql); + String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, sql); + SqlUtils.logSql(LOGGER, sqlLogging, sql, enrichedSql, placeholderKeyValues); relationalExecutionHelper.executeStatement(enrichedSql); } } @@ -62,6 +71,7 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) List resultSetList = new ArrayList<>(); for (String sql : physicalPlan.getSqlList()) { + SqlUtils.logSql(LOGGER, sqlLogging, sql); List> queryResult = relationalExecutionHelper.executeQuery(sql); if (!queryResult.isEmpty()) { @@ -72,12 +82,13 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) } @Override - public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) + public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) { List resultSetList = new ArrayList<>(); for (String sql : physicalPlan.getSqlList()) { - String enrichedSql = getEnrichedSql(placeholderKeyValues, sql); + String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, sql); + SqlUtils.logSql(LOGGER, sqlLogging, sql, enrichedSql, placeholderKeyValues); List> queryResult = relationalExecutionHelper.executeQuery(enrichedSql); if (!queryResult.isEmpty()) { @@ -105,6 +116,12 @@ public Dataset constructDatasetFromDatabase(Dataset dataset) return relationalSink.constructDatasetFromDatabaseFn().execute(this, relationalExecutionHelper, dataset); } + @Override + public void setSqlLogging(SqlLogging sqlLogging) + { + this.sqlLogging = sqlLogging; + } + @Override public void begin() { @@ -134,14 +151,4 @@ public RelationalExecutionHelper getRelationalExecutionHelper() { return this.relationalExecutionHelper; } - - private String getEnrichedSql(Map placeholderKeyValues, String sql) - { - String enrichedSql = sql; - for (Map.Entry entry : placeholderKeyValues.entrySet()) - { - enrichedSql = enrichedSql.replaceAll(Pattern.quote(entry.getKey()), entry.getValue()); - } - return enrichedSql; - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 9b8ac1db944..d2a986eaffd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -66,6 +66,7 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import java.sql.Connection; import java.sql.DriverManager; @@ -203,7 +204,7 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } @Override - public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { executor.executePhysicalPlan(ingestSqlPlan, placeHolderKeyValues); @@ -227,10 +228,10 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor additionalMetadata = new HashMap<>(); + additionalMetadata.put("watermark", "my_watermark_value"); + additionalMetadata.put("external_uuid", "my_external_uuid"); + // Verify SQLs using generator RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) .bulkLoadEventIdValue(EVENT_ID_1) - .putAllAdditionalMetadata(ADDITIONAL_METADATA) + .putAllAdditionalMetadata(additionalMetadata) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -380,10 +384,10 @@ public void testBulkLoadWithDigestGeneratedWithFieldsToExcludeAuditEnabled() thr String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table6.csv"; - RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.of(EVENT_ID_1), ADDITIONAL_METADATA); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.of(EVENT_ID_1), additionalMetadata); executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); Map appendMetadata = h2Sink.executeQuery("select * from batch_metadata").get(0); - verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.of(EVENT_ID_1), Optional.of(ADDITIONAL_METADATA)); + verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.of(EVENT_ID_1), Optional.of(additionalMetadata)); } @Test @@ -415,12 +419,17 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + Map additionalMetadata = new HashMap<>(); + additionalMetadata.put("watermark", "my_watermark_value"); + additionalMetadata.put("external_uuid", "my_external_uuid"); + // Verify SQLs using generator RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) .bulkLoadEventIdValue(EVENT_ID_1) + .putAllAdditionalMetadata(additionalMetadata) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -457,10 +466,10 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table4.csv"; - RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER, Optional.of(EVENT_ID_1)); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER, Optional.of(EVENT_ID_1), additionalMetadata); executePlansAndVerifyForCaseConversion(ingestor, datasets, schema, expectedDataPath, expectedStats); Map appendMetadata = h2Sink.executeQuery("select * from BATCH_METADATA").get(0); - verifyBulkLoadMetadataForUpperCase(appendMetadata, filePath, 1, Optional.of(EVENT_ID_1), Optional.empty()); + verifyBulkLoadMetadataForUpperCase(appendMetadata, filePath, 1, Optional.of(EVENT_ID_1), Optional.of(additionalMetadata)); } @Test diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index ed8e4f05e67..040d23b084a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -71,6 +71,7 @@ import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.util.Capability; +import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -233,7 +234,7 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } @Override - public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { List results = executor.executePhysicalPlanAndGetResults(ingestSqlPlan, placeHolderKeyValues); List> resultSets = results.get(0).getData(); @@ -283,8 +284,8 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor Date: Tue, 6 Feb 2024 17:44:09 +0800 Subject: [PATCH 02/32] Clean up --- .../components/relational/api/RelationalIngestorAbstract.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index fe4d5b0ee10..c59b163cf3d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -586,7 +586,7 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) // 7. Enrich temp Datasets enrichedDatasets = enrichedIngestMode.accept(new TempDatasetsEnricher(enrichedDatasets)); - // 8. Put additional metadata into placeholder + // 8. Use a placeholder for additional metadata Map placeholderAdditionalMetadata = new HashMap<>(); if (!additionalMetadata().isEmpty()) { From 6ab1e549984ef07dc65320eb4a897f1502a69567 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 12 Feb 2024 12:17:44 +0530 Subject: [PATCH 03/32] Address code review Comments and add test for additional metadata check --- .../persistence/components/util/SqlUtils.java | 7 +++++-- .../api/RelationalIngestorAbstract.java | 4 ++-- .../pom.xml | 12 +++++++++++ .../ingestmode/bulkload/BulkLoadTest.java | 21 ++++++++++++++----- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/SqlUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/SqlUtils.java index f93d2dd5933..10425ada738 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/SqlUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/SqlUtils.java @@ -52,11 +52,14 @@ public static void logSql(Logger logger, SqlLogging sqlLogging, String sqlBefore case MASKED: String maskedSql = getEnrichedSqlWithMasking(placeholderKeyValues, sqlBeforeReplacingPlaceholders); logger.info(maskedSql); - return; + break; case UNMASKED: logger.info(sqlAfterReplacingPlaceholders); - return; + break; case DISABLED: + break; + default: + throw new IllegalArgumentException("Unsupported sqlLogging: " + sqlLogging); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index c59b163cf3d..60a5310c55d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -80,7 +80,7 @@ public abstract class RelationalIngestorAbstract private static final String BATCH_END_TS_PATTERN = "{BATCH_END_TIMESTAMP_PLACEHOLDER}"; private static final String ADDITIONAL_METADATA_KEY_PATTERN = "{ADDITIONAL_METADATA_KEY_PLACEHOLDER}"; private static final String ADDITIONAL_METADATA_VALUE_PATTERN = "{ADDITIONAL_METADATA_VALUE_PLACEHOLDER}"; - private static final String ADDITIONAL_METADATA_PLACEHOLDER_KEY_PATTERN = "{\"" + ADDITIONAL_METADATA_KEY_PATTERN + "\":\"" + ADDITIONAL_METADATA_VALUE_PATTERN + "\"}"; + private static final String ADDITIONAL_METADATA_PLACEHOLDER_PATTERN = "{\"" + ADDITIONAL_METADATA_KEY_PATTERN + "\":\"" + ADDITIONAL_METADATA_VALUE_PATTERN + "\"}"; private static final Logger LOGGER = LoggerFactory.getLogger(RelationalIngestor.class); @@ -829,7 +829,7 @@ else if (lowerBound instanceof Number) { ObjectMapper objectMapper = new ObjectMapper(); String additionalMetadataString = objectMapper.writeValueAsString(additionalMetadata()); - placeHolderKeyValues.put(ADDITIONAL_METADATA_PLACEHOLDER_KEY_PATTERN, PlaceholderValue.of(additionalMetadataString, true)); + placeHolderKeyValues.put(ADDITIONAL_METADATA_PLACEHOLDER_PATTERN, PlaceholderValue.of(additionalMetadataString, true)); } catch (JsonProcessingException e) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml index abe70c656dd..ecf4b913d11 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml @@ -80,5 +80,17 @@ 3.10 test + + + com.fasterxml.jackson.core + jackson-core + test + + + com.fasterxml.jackson.core + jackson-databind + test + + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index e32e7c9a584..d2db5bcf01e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.ingestmode.bulkload; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.FileFormatType; @@ -764,25 +766,34 @@ RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions o .build(); } - private void verifyBulkLoadMetadata(Map appendMetadata, String fileName, int batchId, Optional eventId, Optional> additionalMetadata) + private void verifyBulkLoadMetadata(Map appendMetadata, String fileName, int batchId, Optional eventId, Optional> additionalMetadata) throws JsonProcessingException { Assertions.assertEquals(batchId, appendMetadata.get("table_batch_id")); Assertions.assertEquals("SUCCEEDED", appendMetadata.get("batch_status")); Assertions.assertEquals("main", appendMetadata.get("table_name")); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_start_ts_utc").toString()); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_end_ts_utc").toString()); - Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"file_paths\":[\"%s\"]", fileName))); + String batchSourceInfoStr = (String) appendMetadata.get("batch_source_info"); + HashMap batchSourceInfoMap = new ObjectMapper().readValue(batchSourceInfoStr, HashMap.class); + Assertions.assertEquals(batchSourceInfoMap.get("file_paths").toString(), String.format("[%s]", fileName)); + if (eventId.isPresent()) { - Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"event_id\":\"%s\"", eventId.get()))); + Assertions.assertEquals(batchSourceInfoMap.get("event_id"), eventId.get()); } else { - Assertions.assertFalse(appendMetadata.get("batch_source_info").toString().contains("\"event_id\"")); + Assertions.assertFalse(batchSourceInfoMap.containsKey("event_id")); } if (additionalMetadata.isPresent()) { - Assertions.assertNotNull(appendMetadata.get("additional_metadata")); + String additionalMetaStr = (String) appendMetadata.get("additional_metadata"); + Assertions.assertNotNull(additionalMetaStr); + HashMap additionalMetaMap = new ObjectMapper().readValue(additionalMetaStr, HashMap.class); + for (Map.Entry entry :additionalMetadata.get().entrySet()) + { + Assertions.assertEquals(additionalMetaMap.get(entry.getKey()), entry.getValue()); + } } else { From e3a19d5758b3f6d47af3ee6e139adea90d78020a Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 7 Feb 2024 17:21:57 +0530 Subject: [PATCH 04/32] Dry Run mode for Snowflake --- .../logicalplan/operations/CopyAbstract.java | 10 ++- .../components/planner/BulkLoadPlanner.java | 52 +++++++++++- .../components/planner/Planner.java | 10 +++ .../components/util/Capability.java | 3 +- .../components/executor/Executor.java | 2 + .../executor/RelationalExecutionHelper.java | 2 + .../relational/ansi/AnsiSqlSink.java | 5 ++ .../bigquery/executor/BigQueryExecutor.java | 7 ++ .../bigquery/executor/BigQueryHelper.java | 6 ++ .../components/relational/RelationalSink.java | 2 + .../relational/api/DryRunResultAbstract.java | 35 ++++++++ .../api/GeneratorResultAbstract.java | 14 ++++ .../api/RelationalGeneratorAbstract.java | 10 +++ .../api/RelationalIngestorAbstract.java | 24 ++++++ .../executor/RelationalExecutor.java | 15 ++++ .../relational/jdbc/JdbcHelper.java | 36 +++++++++ .../jdbc/JdbcTransactionManager.java | 53 +++++++++--- .../ingestmode/bulkload/BulkLoadTest.java | 5 +- .../relational/snowflake/SnowflakeSink.java | 33 ++++++++ .../snowflake/sql/visitor/CopyVisitor.java | 23 ++++-- .../schemaops/statements/CopyStatement.java | 80 ++++++++++++++----- .../components/ingestmode/BulkLoadTest.java | 47 +++++++++-- .../sqldom/schemaops/CopyStatementTest.java | 46 ++++++++++- 23 files changed, 470 insertions(+), 50 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java index 82e5876ed42..8f5a744bd67 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java @@ -34,15 +34,17 @@ ) public interface CopyAbstract extends Operation { - @Parameter(order = 0) Dataset targetDataset(); - @Parameter(order = 1) Dataset sourceDataset(); - @Parameter(order = 2) List fields(); - @Parameter(order = 3) StagedFilesDatasetProperties stagedFilesDatasetProperties(); + + @org.immutables.value.Value.Default + default boolean dryRun() + { + return false; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index ad882e365b8..ecdb551f577 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -23,6 +23,9 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.ExternalDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetAdditionalProperties; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.TableType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; @@ -112,6 +115,24 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } } + @Override + public LogicalPlan buildLogicalPlanForDryRun(Resources resources) + { + List operations = new ArrayList<>(); + if (capabilities.contains(Capability.DRY_RUN)) + { + Dataset validationDataset = getValidationDataset(); + Copy copy = Copy.builder() + .targetDataset(validationDataset) + .sourceDataset(stagedFilesDataset.datasetReference().withAlias("")) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .dryRun(true) + .build(); + operations.add(copy); + } + return LogicalPlan.of(operations); + } + private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); @@ -131,7 +152,13 @@ private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) } Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); - return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert, stagedFilesDataset.stagedFilesDatasetProperties()))); + return LogicalPlan.of(Collections.singletonList( + Copy.builder() + .targetDataset(mainDataset()) + .sourceDataset(selectStage) + .addAllFields(fieldsToInsert) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .build())); } private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) @@ -176,6 +203,17 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) return LogicalPlan.of(operations); } + @Override + public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) + { + List operations = new ArrayList<>(); + if (capabilities.contains(Capability.DRY_RUN)) + { + operations.add(Create.of(true, getValidationDataset())); + } + return LogicalPlan.of(operations); + } + @Override public LogicalPlan buildLogicalPlanForPostActions(Resources resources) { @@ -248,4 +286,16 @@ protected void addPostRunStatsForRowsDeleted(Map pos { // Not supported at the moment } + + private Dataset getValidationDataset() + { + String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); + return DatasetDefinition.builder() + .schema(stagedFilesDataset.schema()) + .database(mainDataset().datasetReference().database()) + .group(mainDataset().datasetReference().group()) + .name(tableName + UNDERSCORE + "validation") + .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) + .build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 65c3f9fe32e..8dd31a0524a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -256,6 +256,16 @@ protected PlannerOptions options() public abstract LogicalPlan buildLogicalPlanForIngest(Resources resources); + public LogicalPlan buildLogicalPlanForDryRun(Resources resources) + { + return LogicalPlan.of(Collections.emptyList()); + } + + public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) + { + return LogicalPlan.of(Collections.emptyList()); + } + public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { // Save staging filters into batch_source_info column diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index f99f9f94a23..4ec80de6b9e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -22,5 +22,6 @@ public enum Capability EXPLICIT_DATA_TYPE_CONVERSION, DATA_TYPE_LENGTH_CHANGE, DATA_TYPE_SCALE_CHANGE, - TRANSFORM_WHILE_COPY; + TRANSFORM_WHILE_COPY, + DRY_RUN; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java index 543a2076f4f..3ce2231fdc7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java @@ -31,6 +31,8 @@ public interface Executor executePhysicalPlanAndGetResults(P physicalPlan); + List executePhysicalPlanAndGetResults(P physicalPlan, int rows); + List executePhysicalPlanAndGetResults(P physicalPlan, Map placeholderKeyValues); boolean datasetExists(Dataset dataset); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java index c347996f81f..3a6ae90fe35 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java @@ -42,6 +42,8 @@ public interface RelationalExecutionHelper void executeStatements(List sqls); + List> executeQuery(String sql, int rows); + List> executeQuery(String sql); void close(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 972f6179de7..48b1f93640a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -329,4 +329,9 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan) + { + throw new UnsupportedOperationException("DryRun not supported!"); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java index e6bfa481967..591b552b97f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java @@ -93,6 +93,13 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) return executePhysicalPlanAndGetResults(physicalPlan, new HashMap<>()); } + @Override + public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, int rows) + { + // TODO to be implemented + return null; + } + @Override public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 2a2b39d71b7..be90222d9db 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -368,6 +368,12 @@ public void executeStatements(List sqls) } } + @Override + public List> executeQuery(String sql, int rows) + { + throw new RuntimeException("Not implemented for Big Query"); + } + public void executeStatementsInANewTransaction(List sqls) { BigQueryTransactionManager txManager = null; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 723cb7e3a7d..69895ae78e6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -191,4 +191,6 @@ public interface ConstructDatasetFromDatabase } public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); + + public abstract void performDryRun(Executor executor, SqlPlan dryRunSqlPlan); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java new file mode 100644 index 00000000000..4a88ebb2f7a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.api; + +import org.immutables.value.Value; + +import java.util.List; +import java.util.Map; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public abstract class DryRunResultAbstract +{ + public abstract IngestStatus status(); + + public abstract List> errorRecords(); +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 8ecf5cbaa16..9bdb5f00337 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -43,6 +43,8 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan preActionsSqlPlan(); + public abstract SqlPlan dryRunPreActionsSqlPlan(); + public abstract Optional initializeLockSqlPlan(); public abstract Optional acquireLockSqlPlan(); @@ -53,6 +55,8 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan ingestSqlPlan(); + public abstract SqlPlan dryRunSqlPlan(); + public abstract Optional ingestDataSplitRange(); public abstract SqlPlan metadataIngestSqlPlan(); @@ -74,6 +78,11 @@ public List preActionsSql() return preActionsSqlPlan().getSqlList(); } + public List dryRunPreActionsSql() + { + return dryRunPreActionsSqlPlan().getSqlList(); + } + public List initializeLockSql() { return initializeLockSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); @@ -99,6 +108,11 @@ public List ingestSql() .orElseGet(ingestSqlPlan()::getSqlList); } + public List dryRunSql() + { + return dryRunSqlPlan().getSqlList(); + } + public List metadataIngestSql() { return metadataIngestSqlPlan().getSqlList(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index ed0523fc0a3..f71e302b3da 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -219,6 +219,10 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan preActionsLogicalPlan = planner.buildLogicalPlanForPreActions(resources); SqlPlan preActionsSqlPlan = transformer.generatePhysicalPlan(preActionsLogicalPlan); + // dry-run pre-actions + LogicalPlan dryRunPreActionsLogicalPlan = planner.buildLogicalPlanForDryRunPreActions(resources); + SqlPlan dryRunPreActionsSqlPlan = transformer.generatePhysicalPlan(dryRunPreActionsLogicalPlan); + // initialize-lock LogicalPlan initializeLockLogicalPlan = planner.buildLogicalPlanForInitializeLock(resources); Optional initializeLockSqlPlan = Optional.empty(); @@ -272,6 +276,10 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan ingestLogicalPlan = planner.buildLogicalPlanForIngest(resources); SqlPlan ingestSqlPlan = transformer.generatePhysicalPlan(ingestLogicalPlan); + // dry-run + LogicalPlan dryRunLogicalPlan = planner.buildLogicalPlanForDryRun(resources); + SqlPlan dryRunSqlPlan = transformer.generatePhysicalPlan(dryRunLogicalPlan); + // metadata ingest LogicalPlan metaDataIngestLogicalPlan = planner.buildLogicalPlanForMetadataIngest(resources); SqlPlan metaDataIngestSqlPlan = transformer.generatePhysicalPlan(metaDataIngestLogicalPlan); @@ -297,11 +305,13 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann return GeneratorResult.builder() .preActionsSqlPlan(preActionsSqlPlan) + .dryRunPreActionsSqlPlan(dryRunPreActionsSqlPlan) .initializeLockSqlPlan(initializeLockSqlPlan) .acquireLockSqlPlan(acquireLockSqlPlan) .schemaEvolutionSqlPlan(schemaEvolutionSqlPlan) .schemaEvolutionDataset(schemaEvolutionDataset) .ingestSqlPlan(ingestSqlPlan) + .dryRunSqlPlan(dryRunSqlPlan) .postActionsSqlPlan(postActionsSqlPlan) .postCleanupSqlPlan(postCleanupSqlPlan) .metadataIngestSqlPlan(metaDataIngestSqlPlan) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 60a5310c55d..04d5197117f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -259,6 +259,18 @@ public SchemaEvolutionResult evolve() return schemaEvolveResult; } + /* + - Perform dry run of Ingestion - only supported for Bulk Load atm + */ + public void dryRun() + { + LOGGER.info("Invoked dryRun method, will perform the dryRun"); + validateDatasetsInitialization(); + // TODO invoke dry run + LOGGER.info("DryRun completed"); + } + + /* - Perform ingestion from staging to main dataset based on the Ingest mode, executes in current transaction */ @@ -490,6 +502,18 @@ private List ingest(List dataSplitRanges, Schema } } + private void performDryRun() + { + if (enrichedIngestMode instanceof BulkLoad) + { + relationalSink().performDryRun(executor, generatorResult.ingestSqlPlan()); + } + else + { + throw new RuntimeException("dry Run not supported for this ingest Mode : " + enrichedIngestMode.getClass().getSimpleName()); + } + } + private List performFullIngestion(RelationalConnection connection, Datasets datasets, List dataSplitRanges) { // 1. init diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java index 8e8e509abf3..7179414ca14 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java @@ -81,6 +81,21 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) return resultSetList; } + @Override + public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, int rows) + { + List resultSetList = new ArrayList<>(); + for (String sql : physicalPlan.getSqlList()) + { + List> queryResult = relationalExecutionHelper.executeQuery(sql, rows); + if (!queryResult.isEmpty()) + { + resultSetList.add(new TabularData(queryResult)); + } + } + return resultSetList; + } + @Override public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java index f1136234cbb..723927863ec 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java @@ -470,6 +470,42 @@ public void executeStatements(List sqls) } } + @Override + public List> executeQuery(String sql, int rows) + { + if (this.transactionManager != null) + { + return this.transactionManager.convertResultSetToList(sql, rows); + } + else + { + JdbcTransactionManager txManager = null; + try + { + txManager = new JdbcTransactionManager(connection); + return txManager.convertResultSetToList(sql, rows); + } + catch (Exception e) + { + throw new RuntimeException("Error executing SQL query: " + sql, e); + } + finally + { + if (txManager != null) + { + try + { + txManager.close(); + } + catch (SQLException e) + { + LOGGER.error("Error closing transaction manager.", e); + } + } + } + } + } + @Override public List> executeQuery(String sql) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java index 09b25ea1c2b..2dfa4abb349 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java @@ -83,19 +83,7 @@ public List> convertResultSetToList(String sql) { while (resultSet.next()) { - ResultSetMetaData metaData = resultSet.getMetaData(); - int columnCount = resultSet.getMetaData().getColumnCount(); - Map row = new HashMap<>(); - for (int i = 1; i <= columnCount; i++) - { - Object value = resultSet.getObject(i); - if (metaData.getColumnTypeName(i).equalsIgnoreCase("JSON") && value instanceof byte[]) - { - value = new String((byte[]) value, StandardCharsets.UTF_8); - } - row.put(metaData.getColumnName(i), value); - } - resultList.add(row); + extractResults(resultList, resultSet); } } return resultList; @@ -105,4 +93,43 @@ public List> convertResultSetToList(String sql) throw new RuntimeException(e); } } + + public List> convertResultSetToList(String sql, int rows) + { + try + { + List> resultList = new ArrayList<>(); + try (ResultSet resultSet = this.statement.executeQuery(sql)) + { + int iter = 0; + while (resultSet.next() && iter < rows) + { + iter++; + extractResults(resultList, resultSet); + } + } + return resultList; + } + catch (SQLException e) + { + throw new RuntimeException(e); + } + } + + private static void extractResults(List> resultList, ResultSet resultSet) throws SQLException + { + ResultSetMetaData metaData = resultSet.getMetaData(); + int columnCount = resultSet.getMetaData().getColumnCount(); + Map row = new HashMap<>(); + for (int i = 1; i <= columnCount; i++) + { + Object value = resultSet.getObject(i); + if (metaData.getColumnTypeName(i).equalsIgnoreCase("JSON") && value instanceof byte[]) + { + value = new String((byte[]) value, StandardCharsets.UTF_8); + } + row.put(metaData.getColumnName(i), value); + } + resultList.add(row); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index d2db5bcf01e..b2d40e72e31 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -131,7 +131,9 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoBulkLoadEventId() th GeneratorResult operations = generator.generateOperations(datasets); List preActionsSql = operations.preActionsSql(); + List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); + List dryRunSql = operations.dryRunSql(); Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + @@ -146,7 +148,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoBulkLoadEventId() th Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); - + Assertions.assertEquals(0, dryRunPreActionsSql.size()); + Assertions.assertEquals(0, dryRunSql.size()); // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 040d23b084a..96c0c14b2ab 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -111,6 +111,18 @@ public class SnowflakeSink extends AnsiSqlSink private static final String FIRST_ERROR = "first_error"; private static final String FIRST_ERROR_COLUMN_NAME = "first_error_column_name"; + private static final String ERROR = "ERROR"; + private static final String FILE_WITH_ERROR = "FILE"; + private static final String LINE = "LINE"; + private static final String CHARACTER = "CHARACTER"; + private static final String BYTE_OFFSET = "BYTE_OFFSET"; + private static final String CATEGORY = "CATEGORY"; + private static final String COLUMN_NAME = "COLUMN_NAME"; + private static final String ROW_NUMBER = "ROW_NUMBER"; + private static final String ROW_START_LINE = "ROW_START_LINE"; + + private static final String REJECTED_RECORD = "REJECTED_RECORD"; + static { Set capabilities = new HashSet<>(); @@ -119,6 +131,7 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.IMPLICIT_DATA_TYPE_CONVERSION); capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); + capabilities.add(Capability.DRY_RUN); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -233,6 +246,26 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } + public void performDryRun(Executor executor, SqlPlan dryRunSqlPlan) + { + List results = executor.executePhysicalPlanAndGetResults(dryRunSqlPlan, 25); + List> resultSets = results.get(0).getData(); + for (Map row: resultSets) + { + Object error = row.get(ERROR); + Object file = row.get(FILE_WITH_ERROR); + Object line = row.get(LINE); + Object character = row.get(CHARACTER); + Object byteOffset = row.get(BYTE_OFFSET); + Object category = row.get(CATEGORY); + Object columnName = row.get(COLUMN_NAME); + Object rowNumber = row.get(ROW_NUMBER); + Object rowStartLine = row.get(ROW_START_LINE); + Object rejectedRecord = row.get(REJECTED_RECORD); + } + } + + @Override public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java index c476b8ec0c7..2971fc677a5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; @@ -38,22 +39,29 @@ public VisitorResult visit(PhysicalPlanNode prev, Copy current, VisitorContext c { SnowflakeStagedFilesDatasetProperties properties = (SnowflakeStagedFilesDatasetProperties) current.stagedFilesDatasetProperties(); CopyStatement copyStatement = new CopyStatement(); - setCopyStatementProperties(properties, copyStatement); + setCopyStatementProperties(properties, copyStatement, current); prev.push(copyStatement); List logicalPlanNodes = new ArrayList<>(); logicalPlanNodes.add(current.sourceDataset()); logicalPlanNodes.add(current.targetDataset()); - logicalPlanNodes.addAll(current.fields()); - + if (!current.fields().isEmpty()) + { + logicalPlanNodes.addAll(current.fields()); + } return new VisitorResult(copyStatement, logicalPlanNodes); } - private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProperties properties, CopyStatement copyStatement) + private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProperties properties, CopyStatement copyStatement, Copy current) { copyStatement.setFilePatterns(properties.filePatterns()); copyStatement.setFilePaths(properties.filePaths()); + if (current.dryRun()) + { + copyStatement.setValidationMode("RETURN_ERRORS"); + } + // Add default option into the map Map copyOptions = new HashMap<>(properties.copyOptions()); if (!copyOptions.containsKey("ON_ERROR") && !copyOptions.containsKey("on_error")) @@ -74,8 +82,13 @@ private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProper else if (format instanceof StandardFileFormat) { StandardFileFormat standardFileFormat = (StandardFileFormat) format; + Map formatOptions = new HashMap<>(standardFileFormat.formatOptions()); + if (current.dryRun() && standardFileFormat.formatType().equals(FileFormatType.CSV)) + { + formatOptions.put("ERROR_ON_COLUMN_COUNT_MISMATCH", false); + } copyStatement.setFileFormatType(standardFileFormat.formatType()); - copyStatement.setFileFormatOptions(standardFileFormat.formatOptions()); + copyStatement.setFileFormatOptions(formatOptions); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java index 6560bbc0edb..b2aa09a3f83 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java @@ -15,9 +15,11 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.statements; import org.finos.legend.engine.persistence.components.common.FileFormatType; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.expressions.table.StagedFilesTable; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.TableLike; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Field; @@ -40,7 +42,7 @@ public class CopyStatement implements DMLStatement { private Table table; private final List columns; - private SelectStatement selectStatement; + private TableLike srcTable; private List filePatterns; private List filePaths; private String userDefinedFileFormatName; @@ -48,35 +50,51 @@ public class CopyStatement implements DMLStatement private Map fileFormatOptions; private Map copyOptions; + private String validationMode; + public CopyStatement() { this.columns = new ArrayList<>(); } - public CopyStatement(Table table, List columns, SelectStatement selectStatement) + public CopyStatement(Table table, List columns, TableLike srcTable) { this.table = table; this.columns = columns; - this.selectStatement = selectStatement; + this.srcTable = srcTable; } /* Copy GENERIC PLAN for Snowflake: + + Standard data load + -------------------------------- + COPY INTO [.] + FROM { internalStage | externalStage | externalLocation } + [ FILES = ( '' [ , '' ] [ , ... ] ) ] + [ PATTERN = '' ] + [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | + TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] + [ copyOptions ] + [ VALIDATION_MODE = RETURN__ROWS | RETURN_ERRORS | RETURN_ALL_ERRORS ] + + Data load with transformation -------------------------------- - COPY INTO [.] (COLUMN_LIST) - FROM - ( SELECT [.]$[.] [ , [.]$[.] ... ] - FROM { internalStage | externalStage } ) - [ FILES = ( '' [ , '' ] [ , ... ] ) ] - [ PATTERN = '' ] - [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | - TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] - [ copyOptions ] + COPY INTO [.] [ ( [ , ... ] ) ] + FROM ( SELECT [.]$[.] [ , [.]$[.] ... ] + FROM { internalStage | externalStage } ) + [ FILES = ( '' [ , '' ] [ , ... ] ) ] + [ PATTERN = '' ] + [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | + TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] + [ copyOptions ] + -------------------------------- */ @Override public void genSql(StringBuilder builder) throws SqlDomException { + boolean dataLoadWithTransformation = srcTable instanceof SelectStatement; validate(); builder.append("COPY INTO "); @@ -101,9 +119,15 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(WHITE_SPACE + Clause.FROM.get() + WHITE_SPACE); - builder.append(OPEN_PARENTHESIS); - selectStatement.genSql(builder); - builder.append(CLOSING_PARENTHESIS); + if (dataLoadWithTransformation) + { + builder.append(OPEN_PARENTHESIS); + } + srcTable.genSql(builder); + if (dataLoadWithTransformation) + { + builder.append(CLOSING_PARENTHESIS); + } // File Paths if (filePaths != null && !filePaths.isEmpty()) @@ -139,6 +163,12 @@ else if (fileFormatType != null) builder.append(WHITE_SPACE); addOptions(copyOptions, builder); } + // Add validation mode + if (StringUtils.notEmpty(validationMode)) + { + builder.append(WHITE_SPACE); + builder.append(String.format("VALIDATION_MODE = '%s'", validationMode)); + } } @@ -181,13 +211,17 @@ else if (node instanceof Field) } else if (node instanceof SelectStatement) { - selectStatement = (SelectStatement) node; + srcTable = (SelectStatement) node; + } + else if (node instanceof StagedFilesTable) + { + srcTable = (StagedFilesTable) node; } } void validate() throws SqlDomException { - if (selectStatement == null) + if (srcTable == null) { throw new SqlDomException("selectStatement is mandatory for Copy Table Command"); } @@ -196,6 +230,11 @@ void validate() throws SqlDomException { throw new SqlDomException("table is mandatory for Copy Table Command"); } + + if (StringUtils.notEmpty(validationMode) && srcTable instanceof SelectStatement) + { + throw new SqlDomException("VALIDATION_MODE is not supported for Data load with transformation"); + } } public void setFilePatterns(List filePatterns) @@ -223,8 +262,13 @@ public void setFileFormatOptions(Map fileFormatOptions) this.fileFormatOptions = fileFormatOptions; } + public void setValidationMode(String validationMode) + { + this.validationMode = validationMode; + } + public void setCopyOptions(Map copyOptions) { this.copyOptions = copyOptions; } -} +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index d27da8f8174..80d8cc42d10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -115,7 +115,9 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); + List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); + List dryRunSql = operations.dryRunSql(); List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); @@ -132,9 +134,19 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() String expectedMetadataIngestSql = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + "(SELECT 'my_name',{NEXT_BATCH_ID},'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"task123\",\"file_patterns\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation\"" + + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; + String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation\" FROM my_location " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + + "ON_ERROR = 'ABORT_STATEMENT' " + + "VALIDATION_MODE = 'RETURN_ERRORS'"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); + Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); + Assertions.assertEquals(expectedDryRunSql, dryRunSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -156,7 +168,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .stagedFilesDatasetProperties( SnowflakeStagedFilesDatasetProperties.builder() .location("my_location") - .fileFormat(StandardFileFormat.builder().formatType(FileFormatType.CSV).build()) + .fileFormat(StandardFileFormat.builder().formatType(FileFormatType.AVRO).build()) .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col3, col4)).build()) .alias("t") @@ -180,7 +192,9 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); + List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); + List dryRunSql = operations.dryRunSql(); List metaIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); @@ -191,17 +205,26 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MY_NAME') " + "FROM my_location as t) " + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + - "FILE_FORMAT = (TYPE = 'CSV') " + + "FILE_FORMAT = (TYPE = 'AVRO') " + "ON_ERROR = 'ABORT_STATEMENT'"; String expectedMetaIngestSql = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\", \"additional_metadata\") " + - "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MY_NAME')," + - "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + - "PARSE_JSON('{\"event_id\":\"task123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}')," + - "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; + "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MY_NAME')," + + "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + + "PARSE_JSON('{\"event_id\":\"task123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}')," + + "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; + String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation\"" + + "(\"col_bigint\" BIGINT,\"col_variant\" VARIANT)"; + String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation\" FROM my_location " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + + "FILE_FORMAT = (TYPE = 'AVRO') " + + "ON_ERROR = 'ABORT_STATEMENT' " + + "VALIDATION_MODE = 'RETURN_ERRORS'"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetaIngestSql, metaIngestSql.get(0)); + Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); + Assertions.assertEquals(expectedDryRunSql, dryRunSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -243,7 +266,9 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); + List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); + List dryRunSql = operations.dryRunSql(); List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); @@ -264,9 +289,19 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() "(SELECT 'MY_NAME',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')," + "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME_VALIDATION\"" + + "(\"COL_INT\" INTEGER,\"COL_INTEGER\" INTEGER)"; + String expectedDryRunSql = "COPY INTO \"MY_DB\".\"MY_NAME_VALIDATION\" FROM my_location " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + + "FILE_FORMAT = (FORMAT_NAME = 'my_file_format') " + + "ON_ERROR = 'ABORT_STATEMENT' " + + "VALIDATION_MODE = 'RETURN_ERRORS'"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); + Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); + Assertions.assertEquals(expectedDryRunSql, dryRunSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java index 78877a8d46e..d80c4b42c2a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java @@ -119,6 +119,50 @@ void testCopyStatementWithPatternAndFileFormatAndForceOption() throws SqlDomExce assertEquals(expectedStr, sql1); } + + @Test + void testCopyStatementWithStandardDataLoad() throws SqlDomException + { + Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); + StagedFilesTable stagedFiles = new StagedFilesTable("@my_stage"); + + CopyStatement copyStatement = new CopyStatement(); + copyStatement.push(table); + copyStatement.push(stagedFiles); + copyStatement.setFilePaths(Arrays.asList("path1", "path2")); + Map fileFormatOptions = new HashMap<>(); + fileFormatOptions.put("error_on_column_count_mismatch", false); + copyStatement.setFileFormatType(FileFormatType.CSV); + copyStatement.setFileFormatOptions(fileFormatOptions); + + String sql = genSqlIgnoringErrors(copyStatement); + String expectedSql = "COPY INTO \"mydb\".\"mytable1\" FROM @my_stage FILES = ('path1', 'path2') " + + "FILE_FORMAT = (TYPE = 'CSV', error_on_column_count_mismatch = false)"; + assertEquals(expectedSql, sql); + } + + @Test + void testCopyStatementWithStandardDataLoadAndValidate() throws SqlDomException + { + Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); + StagedFilesTable stagedFiles = new StagedFilesTable("@my_stage"); + + CopyStatement copyStatement = new CopyStatement(); + copyStatement.push(table); + copyStatement.push(stagedFiles); + copyStatement.setFilePaths(Arrays.asList("path1", "path2")); + Map fileFormatOptions = new HashMap<>(); + fileFormatOptions.put("error_on_column_count_mismatch", false); + copyStatement.setFileFormatType(FileFormatType.CSV); + copyStatement.setFileFormatOptions(fileFormatOptions); + copyStatement.setValidationMode("RETURN_ERRORS"); + + String sql = genSqlIgnoringErrors(copyStatement); + String expectedSql = "COPY INTO \"mydb\".\"mytable1\" FROM @my_stage FILES = ('path1', 'path2') " + + "FILE_FORMAT = (TYPE = 'CSV', error_on_column_count_mismatch = false) VALIDATION_MODE = 'RETURN_ERRORS'"; + assertEquals(expectedSql, sql); + } + public static String genSqlIgnoringErrors(SqlGen item) { StringBuilder builder = new StringBuilder(); @@ -132,4 +176,4 @@ public static String genSqlIgnoringErrors(SqlGen item) } return builder.toString(); } -} +} \ No newline at end of file From 9e93ddce447c49abb20639b79c6745a5cbebc8a1 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Thu, 15 Feb 2024 17:09:55 +0530 Subject: [PATCH 05/32] Changes for Data Quality Errors - Duplicates and Data Version Error --- ....java => DedupAndVersionErrorSqlType.java} | 6 +- .../DeriveDataErrorCheckLogicalPlan.java | 4 +- .../DeriveDataErrorsLogicalPlan.java | 107 +++++++++++++++++ .../datasets/SelectionAbstract.java | 2 + .../components/planner/Planner.java | 53 +++++++-- .../ansi/sql/visitors/SelectionVisitor.java | 14 ++- .../components/AnsiTestArtifacts.java | 16 +++ .../nontemporal/NontemporalDeltaTest.java | 16 ++- .../nontemporal/NontemporalSnapshotTest.java | 19 ++- .../UnitemporalDeltaBatchIdBasedTest.java | 13 ++- ...temporalDeltaBatchIdDateTimeBasedTest.java | 6 +- .../UnitemporalDeltaDateTimeBasedTest.java | 8 +- .../UnitemporalSnapshotBatchIdBasedTest.java | 11 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 16 ++- .../UnitemporalSnapshotDateTimeBasedTest.java | 17 ++- .../ingestmode/BigQueryTestArtifacts.java | 12 ++ .../UnitemporalDeltaBatchIdBasedTest.java | 6 +- .../UnitemporalSnapshotBatchIdBasedTest.java | 9 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 12 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 13 ++- .../api/GeneratorResultAbstract.java | 6 +- .../api/RelationalGeneratorAbstract.java | 8 +- .../api/RelationalIngestorAbstract.java | 56 ++++++--- .../exception/DataQualityException.java | 34 ++++++ .../relational/sqldom/common/Clause.java | 1 + .../schemaops/statements/SelectStatement.java | 46 +++++++- .../sqldom/schemaops/ConditionTest.java | 6 +- .../sqldom/schemaops/InsertTest.java | 3 +- .../schemaops/SelectExpressionTest.java | 52 ++++++--- .../sqldom/schemaops/UpdateStatementTest.java | 9 +- .../persistence/components/TestUtils.java | 28 +++++ .../versioning/TestDedupAndVersioning.java | 109 +++++++++++++++--- .../ingestmode/MemsqlTestArtifacts.java | 12 ++ .../ingestmode/NontemporalSnapshotTest.java | 17 ++- .../UnitemporalDeltaBatchIdBasedTest.java | 5 +- .../UnitemporalSnapshotBatchIdBasedTest.java | 9 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 12 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 13 ++- .../sqldom/schemaops/CopyStatementTest.java | 4 +- 39 files changed, 628 insertions(+), 162 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/{DedupAndVersionErrorStatistics.java => DedupAndVersionErrorSqlType.java} (86%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorSqlType.java similarity index 86% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorSqlType.java index 60c193d7938..adda9e48e60 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorSqlType.java @@ -14,8 +14,10 @@ package org.finos.legend.engine.persistence.components.common; -public enum DedupAndVersionErrorStatistics +public enum DedupAndVersionErrorSqlType { MAX_DUPLICATES, - MAX_DATA_ERRORS; + DUPLICATE_ROWS, + MAX_DATA_ERRORS, + DATA_ERROR_ROWS; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java index 86cd4c59e4e..02afced5a4b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.versioning; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; @@ -73,7 +73,7 @@ public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersi private LogicalPlan getLogicalPlanForDataErrorCheck(String versionField) { - String maxDataErrorAlias = DedupAndVersionErrorStatistics.MAX_DATA_ERRORS.name(); + String maxDataErrorAlias = DedupAndVersionErrorSqlType.MAX_DATA_ERRORS.name(); String distinctRowCount = "legend_persistence_distinct_rows"; List pKsAndVersion = new ArrayList<>(); for (String pk: primaryKeys) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java new file mode 100644 index 00000000000..1ba88677201 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java @@ -0,0 +1,107 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; + +import java.util.ArrayList; +import java.util.List; + +public class DeriveDataErrorsLogicalPlan implements VersioningStrategyVisitor +{ + private List primaryKeys; + private List remainingColumns; + private Dataset tempStagingDataset; + private int sampleRowCount; + + public DeriveDataErrorsLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset, int sampleRowCount) + { + this.primaryKeys = primaryKeys; + this.remainingColumns = remainingColumns; + this.tempStagingDataset = tempStagingDataset; + this.sampleRowCount = sampleRowCount; + } + + @Override + public LogicalPlan visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public LogicalPlan visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + if (maxVersionStrategy.performStageVersioning()) + { + return getLogicalPlanForDataErrors(maxVersionStrategy.versioningField()); + } + else + { + return null; + } + } + + @Override + public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (allVersionsStrategyAbstract.performStageVersioning()) + { + return getLogicalPlanForDataErrors(allVersionsStrategyAbstract.versioningField()); + } + else + { + return null; + } + } + + private LogicalPlan getLogicalPlanForDataErrors(String versionField) + { + String distinctRowCount = "legend_persistence_error_count"; + List pKsAndVersion = new ArrayList<>(); + for (String pk: primaryKeys) + { + pKsAndVersion.add(FieldValue.builder().fieldName(pk).build()); + } + pKsAndVersion.add(FieldValue.builder().fieldName(versionField).build()); + + List distinctValueFields = new ArrayList<>(); + for (String field: remainingColumns) + { + distinctValueFields.add(FieldValue.builder().fieldName(field).build()); + } + + FunctionImpl countDistinct = FunctionImpl.builder() + .functionName(FunctionName.COUNT) + .addValue(FunctionImpl.builder().functionName(FunctionName.DISTINCT).addAllValue(distinctValueFields).build()) + .alias(distinctRowCount) + .build(); + + Selection selectDataError = Selection.builder() + .source(tempStagingDataset) + .groupByFields(pKsAndVersion) + .addAllFields(pKsAndVersion) + .addFields(countDistinct) + .havingCondition(GreaterThan.of(FieldValue.builder().fieldName(distinctRowCount).build(), ObjectValue.of(1))) + .limit(sampleRowCount) + .build(); + + return LogicalPlan.builder().addOps(selectDataError).build(); + } + +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java index 917525b2fc8..60530bc2288 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java @@ -47,6 +47,8 @@ public interface SelectionAbstract extends Dataset, Operation Optional> groupByFields(); + Optional havingCondition(); + Optional alias(); Optional limit(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 8dd31a0524a..ba37e76e798 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -19,8 +19,8 @@ import java.util.stream.Collectors; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.Resources; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitor; @@ -32,6 +32,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; @@ -124,6 +125,12 @@ default String batchSuccessStatusValue() { return MetadataUtils.MetaTableStatus.DONE.toString(); } + + @Default + default int sampleRowCount() + { + return 20; + } } private final Datasets datasets; @@ -390,41 +397,65 @@ public Map buildLogicalPlanForPostRunStatistics(Reso return postRunStatisticsResult; } - public Map buildLogicalPlanForDeduplicationAndVersioningErrorChecks(Resources resources) + public Map buildLogicalPlanForDeduplicationAndVersioningErrorChecks(Resources resources) { - Map dedupAndVersioningErrorChecks = new HashMap<>(); + Map dedupAndVersioningErrorChecks = new HashMap<>(); addMaxDuplicatesErrorCheck(dedupAndVersioningErrorChecks); addDataErrorCheck(dedupAndVersioningErrorChecks); return dedupAndVersioningErrorChecks; } - protected void addMaxDuplicatesErrorCheck(Map dedupAndVersioningErrorChecks) + protected void addMaxDuplicatesErrorCheck(Map dedupAndVersioningErrorChecks) { if (ingestMode.deduplicationStrategy() instanceof FailOnDuplicates) { + FieldValue count = FieldValue.builder().datasetRef(tempStagingDataset().datasetReference()).fieldName(COUNT).build(); FunctionImpl maxCount = FunctionImpl.builder() .functionName(FunctionName.MAX) - .addValue(FieldValue.builder().datasetRef(tempStagingDataset().datasetReference()).fieldName(COUNT).build()) - .alias(DedupAndVersionErrorStatistics.MAX_DUPLICATES.name()) + .addValue(count) + .alias(DedupAndVersionErrorSqlType.MAX_DUPLICATES.name()) .build(); Selection selectMaxDupsCount = Selection.builder() .source(tempStagingDataset()) .addFields(maxCount) .build(); LogicalPlan maxDuplicatesCountPlan = LogicalPlan.builder().addOps(selectMaxDupsCount).build(); - dedupAndVersioningErrorChecks.put(DedupAndVersionErrorStatistics.MAX_DUPLICATES, maxDuplicatesCountPlan); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.MAX_DUPLICATES, maxDuplicatesCountPlan); + + /* + select pks from tempStagingDataset where COUNT > 1 + */ + List rowsToSelect = this.primaryKeys.stream().map(field -> FieldValue.builder().fieldName(field).build()).collect(Collectors.toList()); + if (rowsToSelect.size() > 0) + { + rowsToSelect.add(FieldValue.builder().fieldName(COUNT).build()); + Selection selectDuplicatesRows = Selection.builder() + .source(tempStagingDataset()) + .addAllFields(rowsToSelect) + .condition(GreaterThan.of(count, ObjectValue.of(1))) + .limit(options().sampleRowCount()) + .build(); + LogicalPlan selectDuplicatesRowsPlan = LogicalPlan.builder().addOps(selectDuplicatesRows).build(); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.DUPLICATE_ROWS, selectDuplicatesRowsPlan); + } } } - protected void addDataErrorCheck(Map dedupAndVersioningErrorChecks) + protected void addDataErrorCheck(Map dedupAndVersioningErrorChecks) { List remainingColumns = getDigestOrRemainingColumns(); if (ingestMode.versioningStrategy().accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED)) { - LogicalPlan logicalPlan = ingestMode.versioningStrategy().accept(new DeriveDataErrorCheckLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); - if (logicalPlan != null) + LogicalPlan logicalPlanForDataErrorCheck = ingestMode.versioningStrategy().accept(new DeriveDataErrorCheckLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); + if (logicalPlanForDataErrorCheck != null) + { + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS, logicalPlanForDataErrorCheck); + } + + LogicalPlan logicalPlanForDataErrors = ingestMode.versioningStrategy().accept(new DeriveDataErrorsLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset(), options().sampleRowCount())); + if (logicalPlanForDataErrors != null) { - dedupAndVersioningErrorChecks.put(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS, logicalPlan); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS, logicalPlanForDataErrors); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java index 5e574ac08de..a7bbf116d29 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java @@ -41,8 +41,8 @@ public VisitorResult visit(PhysicalPlanNode prev, Selection current, VisitorCont prev.push(selectStatement); List logicalPlanNodeList = new ArrayList<>(); - List conditions = new ArrayList<>(); - current.condition().ifPresent(conditions::add); + List whereConditions = new ArrayList<>(); + current.condition().ifPresent(whereConditions::add); if (current.source().isPresent()) { @@ -57,14 +57,14 @@ select id from (select * from table where condition) { DerivedDataset derivedDataset = (DerivedDataset) dataset; Condition filterCondition = LogicalPlanUtils.getDatasetFilterCondition(derivedDataset); - conditions.add(filterCondition); + whereConditions.add(filterCondition); logicalPlanNodeList.add(derivedDataset.datasetReference()); } else if (dataset instanceof FilteredDataset) { FilteredDataset filteredDataset = (FilteredDataset) dataset; Condition filterCondition = filteredDataset.filter(); - conditions.add(filterCondition); + whereConditions.add(filterCondition); logicalPlanNodeList.add(filteredDataset.datasetReference()); } else @@ -89,12 +89,14 @@ else if (dataset instanceof FilteredDataset) selectStatement.setLimit(current.limit().get()); } - if (!conditions.isEmpty()) + if (!whereConditions.isEmpty()) { - logicalPlanNodeList.add(And.of(conditions)); + selectStatement.setHasWhereCondition(true); + logicalPlanNodeList.add(And.of(whereConditions)); } current.groupByFields().ifPresent(logicalPlanNodeList::addAll); + current.havingCondition().ifPresent(logicalPlanNodeList::add); current.quantifier().ifPresent(logicalPlanNodeList::add); return new VisitorResult(selectStatement, logicalPlanNodeList); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java index 7131c44d12b..db14f25cfc3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java @@ -605,6 +605,9 @@ public static String getDropTempTableQuery(String tableName) public static String maxDupsErrorCheckSql = "SELECT MAX(stage.\"legend_persistence_count\") as \"MAX_DUPLICATES\" FROM " + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage"; + public static String dupRowsSql = "SELECT \"id\",\"name\",\"legend_persistence_count\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE stage.\"legend_persistence_count\" > 1 LIMIT 20"; + public static String dataErrorCheckSqlWithBizDateVersion = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + "(SELECT COUNT(DISTINCT(\"digest\")) as \"legend_persistence_distinct_rows\" FROM " + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; @@ -620,4 +623,17 @@ public static String getDropTempTableQuery(String tableName) public static String dataErrorCheckSqlWithBizDateAsVersionUpperCase = "SELECT MAX(\"LEGEND_PERSISTENCE_DISTINCT_ROWS\") as \"MAX_DATA_ERRORS\" " + "FROM (SELECT COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_DISTINCT_ROWS\" FROM " + "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\") as stage"; + + public static String dataErrorsSqlWithBizDateVersion = "SELECT \"id\",\"name\",\"biz_date\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT \"ID\",\"NAME\",\"BIZ_DATE\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + "as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 20"; + + public static String dataErrorsSqlUpperCase = "SELECT \"ID\",\"NAME\",\"VERSION\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + "as stage GROUP BY \"ID\", \"NAME\", \"VERSION\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 20"; + + public static String dataErrorsSql = "SELECT \"id\",\"name\",\"version\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"version\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index 66ca6b07e82..942cf2cbd93 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; @@ -29,8 +29,7 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; public class NontemporalDeltaTest extends NontemporalDeltaTestCases { @@ -56,7 +55,7 @@ public void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + @@ -104,7 +103,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List milestoningSqlList = operations.ingestSql(); List metadataIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + @@ -170,6 +169,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List 18972"; // Stats @@ -592,6 +597,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat Assertions.assertEquals(insertTempStagingTable, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index d3d3ef3e843..d867324e8c0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -29,6 +29,8 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { @@ -48,7 +50,7 @@ public void verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(GeneratorResu List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map andVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map andVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + @@ -79,7 +81,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato List milestoningSqlList = operations.ingestSql(); List metadataIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\", \"batch_id\") " + @@ -108,7 +110,7 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe List milestoningSqlList = operations.ingestSql(); List metadataIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\", \"batch_id\") " + @@ -120,6 +122,9 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe "(SELECT COUNT(DISTINCT(\"amount\")) as \"legend_persistence_distinct_rows\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + "as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; + String dataErrorsSql = "SELECT \"id\",\"name\",\"biz_date\",COUNT(DISTINCT(\"amount\")) as \"legend_persistence_error_count\" FROM " + + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(2)); @@ -129,8 +134,10 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(dataErrorsSql, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); // Stats verifyStats(operations, "staging"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java index ad272ce5473..ea11abec0c9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; @@ -27,7 +27,7 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @@ -128,7 +128,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -214,6 +214,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; @@ -367,6 +368,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -413,6 +415,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -492,7 +495,8 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithFilterDuplicates, operations.deduplicationAndVersioningSql().get(1)); - Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(dupRowsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -533,6 +537,7 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta Assertions.assertEquals(expectedTempStagingCleanupQueryInUpperCase, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoTempStagingMaxVersion, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 55890efccd2..3f27df023f1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -26,8 +26,8 @@ import java.util.List; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; public class UnitemporalDeltaBatchIdDateTimeBasedTest extends UnitmemporalDeltaBatchIdDateTimeBasedTestCases { @@ -240,6 +240,8 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; @@ -201,6 +202,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -86,7 +88,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -109,7 +111,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index e4b03057f4c..2dafcb4408a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -25,6 +25,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; + public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSnapshotBatchIdDateTimeBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; @@ -70,7 +73,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + @@ -94,7 +97,9 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -120,7 +125,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = " + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + @@ -139,7 +144,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateAsVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateAsVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java index 4db9bb8ae4d..a0b04f4d4b4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -25,6 +25,11 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.dataErrorsSqlWithBizDateVersion; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.dupRowsSql; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { @@ -41,7 +46,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorR List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + @@ -75,7 +80,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + @@ -98,8 +103,10 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java index 41dad3358bb..34328f91db4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java @@ -556,6 +556,18 @@ public class BigQueryTestArtifacts "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE stage.`legend_persistence_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + "as stage GROUP BY `ID`, `NAME`, `BIZ_DATE` HAVING `LEGEND_PERSISTENCE_ERROR_COUNT` > 1 LIMIT 20"; + + public static String dataErrorsSql = "SELECT `id`,`name`,`version`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + public static String getDropTempTableQuery(String tableName) { return String.format("DROP TABLE IF EXISTS %s", tableName); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 44a38605f2e..4e4513caa75 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -23,7 +24,8 @@ import java.util.List; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.MAX_DATA_ERRORS; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @@ -386,6 +388,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -431,6 +434,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java index 2d36914369f..85398256eb4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -24,6 +24,8 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBatchIdBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; @@ -69,7 +71,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + @@ -89,7 +91,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(2)); Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index b9741ec5ca4..1800ffe2152 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -69,7 +69,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + @@ -89,7 +89,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -120,7 +121,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + @@ -139,7 +140,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSqlWithBizDateVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index e51aa4cdfe3..42869079411 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -24,6 +24,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { @@ -70,7 +73,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + @@ -97,8 +100,10 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 9bdb5f00337..7b049f18b2b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.api; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -67,7 +67,7 @@ public abstract class GeneratorResultAbstract public abstract Optional postCleanupSqlPlan(); - public abstract Map deduplicationAndVersioningErrorChecksSqlPlan(); + public abstract Map deduplicationAndVersioningErrorChecksSqlPlan(); public abstract Map preIngestStatisticsSqlPlan(); @@ -141,7 +141,7 @@ public Map preIngestStatisticsSql() k -> preIngestStatisticsSqlPlan().get(k).getSql())); } - public Map deduplicationAndVersioningErrorChecksSql() + public Map deduplicationAndVersioningErrorChecksSql() { return deduplicationAndVersioningErrorChecksSqlPlan().keySet().stream() .collect(Collectors.toMap( diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index f71e302b3da..1bd6afb4651 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.api; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; @@ -265,9 +265,9 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann deduplicationAndVersioningSqlPlan = Optional.of(transformer.generatePhysicalPlan(deduplicationAndVersioningLogicalPlan)); } - Map deduplicationAndVersioningErrorChecksLogicalPlan = planner.buildLogicalPlanForDeduplicationAndVersioningErrorChecks(resources); - Map deduplicationAndVersioningErrorChecksSqlPlan = new HashMap<>(); - for (DedupAndVersionErrorStatistics statistic : deduplicationAndVersioningErrorChecksLogicalPlan.keySet()) + Map deduplicationAndVersioningErrorChecksLogicalPlan = planner.buildLogicalPlanForDeduplicationAndVersioningErrorChecks(resources); + Map deduplicationAndVersioningErrorChecksSqlPlan = new HashMap<>(); + for (DedupAndVersionErrorSqlType statistic : deduplicationAndVersioningErrorChecksLogicalPlan.keySet()) { deduplicationAndVersioningErrorChecksSqlPlan.put(statistic, transformer.generatePhysicalPlan(deduplicationAndVersioningErrorChecksLogicalPlan.get(statistic))); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 04d5197117f..17149221e63 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -32,6 +32,7 @@ import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.exception.DataQualityException; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; @@ -56,8 +57,10 @@ import java.time.LocalDateTime; import java.util.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.TABLE_IS_NON_EMPTY; import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.*; +import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.retrieveValueAsLong; import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.BULK_LOAD_BATCH_STATUS_PATTERN; import static org.finos.legend.engine.persistence.components.transformer.Transformer.TransformOptionsAbstract.DATE_TIME_FORMATTER; @@ -427,24 +430,39 @@ public void dedupAndVersion() { LOGGER.info("Executing Deduplication and Versioning"); executor.executePhysicalPlan(generatorResult.deduplicationAndVersioningSqlPlan().get()); - Map errorStatistics = executeDeduplicationAndVersioningErrorChecks(executor, generatorResult.deduplicationAndVersioningErrorChecksSqlPlan()); - /* Error Checks - 1. if Dedup = fail on dups, Fail the job if count > 1 - 2. If versioining = Max Version/ All Versioin, Check for data error - */ - Optional maxDuplicatesValue = retrieveValueAsLong(errorStatistics.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Optional maxDataErrorsValue = retrieveValueAsLong(errorStatistics.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); - if (maxDuplicatesValue.isPresent() && maxDuplicatesValue.get() > 1) + + Map dedupAndVersionErrorSqlTypeSqlPlanMap = generatorResult.deduplicationAndVersioningErrorChecksSqlPlan(); + + // Error Check for Duplicates: if Dedup = fail on dups, Fail the job if count > 1 + if (dedupAndVersionErrorSqlTypeSqlPlanMap.containsKey(MAX_DUPLICATES)) { - String errorMessage = "Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy"; - LOGGER.error(errorMessage); - throw new RuntimeException(errorMessage); + List result = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(MAX_DUPLICATES)); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); + Optional maxDuplicatesValue = retrieveValueAsLong(obj.orElse(null)); + if (maxDuplicatesValue.isPresent() && maxDuplicatesValue.get() > 1) + { + // Find the duplicate rows + TabularData duplicateRows = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DUPLICATE_ROWS)).get(0); + String errorMessage = "Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy"; + LOGGER.error(errorMessage); + throw new DataQualityException(errorMessage, duplicateRows.getData()); + } } - if (maxDataErrorsValue.isPresent() && maxDataErrorsValue.get() > 1) + + // Error Check for Data Error: If versioning = Max Version/ All Versioning, Check for data error + if (dedupAndVersionErrorSqlTypeSqlPlanMap.containsKey(MAX_DATA_ERRORS)) { - String errorMessage = "Encountered Data errors (same PK, same version but different data), hence failing the batch"; - LOGGER.error(errorMessage); - throw new RuntimeException(errorMessage); + List result = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(MAX_DATA_ERRORS)); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); + Optional maxDataErrorsValue = retrieveValueAsLong(obj.orElse(null)); + if (maxDataErrorsValue.isPresent() && maxDataErrorsValue.get() > 1) + { + // Find the data errors + TabularData dataErrors = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DATA_ERROR_ROWS)).get(0); + String errorMessage = "Encountered Data errors (same PK, same version but different data), hence failing the batch"; + LOGGER.error(errorMessage); + throw new DataQualityException(errorMessage, dataErrors.getData()); + } } } } @@ -788,11 +806,11 @@ private Map executeStatisticsPhysicalPlan(Executor executeDeduplicationAndVersioningErrorChecks(Executor executor, - Map errorChecksPlan) + private Map executeDeduplicationAndVersioningErrorChecks(Executor executor, + Map errorChecksPlan) { - Map results = new HashMap<>(); - for (Map.Entry entry: errorChecksPlan.entrySet()) + Map results = new HashMap<>(); + for (Map.Entry entry: errorChecksPlan.entrySet()) { List result = executor.executePhysicalPlanAndGetResults(entry.getValue()); Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java new file mode 100644 index 00000000000..84f0083f7dd --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java @@ -0,0 +1,34 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.exception; + +import java.util.List; +import java.util.Map; + +public class DataQualityException extends RuntimeException +{ + private List> sampleRows; + + public List> getSampleRows() + { + return sampleRows; + } + + public DataQualityException(String message, List> sampleRows) + { + super(message); + this.sampleRows = sampleRows; + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index 712a5da2699..fbbfba7b24c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -31,6 +31,7 @@ public enum Clause MERGE_INTO("MERGE INTO"), USING("USING"), WHERE("WHERE"), + HAVING("HAVING"), SET("SET"), EXISTS("EXISTS"), WHEN_MATCHED("WHEN MATCHED"), diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java index 16d11fe9c9c..a3176a9c3e4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java @@ -43,24 +43,39 @@ public class SelectStatement extends SelectExpression implements DMLStatement private final List selectItems; private Long selectItemsSize; private final List tables; - private Condition condition; + private Condition whereCondition; private final List groupByFields; + private Condition havingCondition; private Integer limit; + private boolean hasWhereCondition; public SelectStatement() { this.selectItems = new ArrayList<>(); this.tables = new ArrayList<>(); this.groupByFields = new ArrayList<>(); + this.hasWhereCondition = false; } - public SelectStatement(Quantifier quantifier, List selectItems, List tables, Condition condition, List groupByFields) + public SelectStatement(Quantifier quantifier, List selectItems, List tables, Condition whereCondition) { this.quantifier = quantifier; this.selectItems = selectItems; this.tables = tables; - this.condition = condition; + this.whereCondition = whereCondition; + this.hasWhereCondition = whereCondition != null; + this.groupByFields = new ArrayList<>(); + } + + public SelectStatement(Quantifier quantifier, List selectItems, List tables, Condition whereCondition, List groupByFields, Condition havingCondition) + { + this.quantifier = quantifier; + this.selectItems = selectItems; + this.tables = tables; + this.whereCondition = whereCondition; this.groupByFields = groupByFields; + this.hasWhereCondition = whereCondition != null; + this.havingCondition = havingCondition; } /* @@ -107,10 +122,10 @@ public void genSql(StringBuilder builder) throws SqlDomException } // Add where clause - if (condition != null) + if (whereCondition != null) { builder.append(WHITE_SPACE + Clause.WHERE.get() + WHITE_SPACE); - condition.genSql(builder); + whereCondition.genSql(builder); } // Add group by clause @@ -130,6 +145,13 @@ public void genSql(StringBuilder builder) throws SqlDomException } } + // Add having clause + if (havingCondition != null) + { + builder.append(WHITE_SPACE + Clause.HAVING.get() + WHITE_SPACE); + havingCondition.genSql(builder); + } + // Add limit clause if (limit != null) { @@ -156,7 +178,14 @@ public void push(Object node) } else if (node instanceof Condition) { - condition = (Condition) node; + if (whereCondition == null && hasWhereCondition) + { + whereCondition = (Condition) node; + } + else + { + havingCondition = (Condition) node; + } } else if (node instanceof Quantifier) { @@ -216,4 +245,9 @@ public void setLimit(int limit) { this.limit = limit; } + + public void setHasWhereCondition(boolean hasWhereCondition) + { + this.hasWhereCondition = hasWhereCondition; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java index f785f09d51a..c6164052c06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java @@ -125,8 +125,7 @@ void testExistsCondition() null, Collections.singletonList(item), Collections.singletonList(table), - condition, - Collections.emptyList()); + condition); Condition existsCondition = new ExistsCondition(selectExpression); String expected = "EXISTS (SELECT \"item1\" FROM \"mydb\".\"mytable\" WHERE \"item1\" = 1)"; @@ -161,8 +160,7 @@ void testInSelectCondition() null, Collections.singletonList(field2), Collections.singletonList(tableB), - null, - Collections.emptyList()); + null); String expected = "sink.\"col1\" IN (SELECT stage.\"col2\" FROM \"mydb\".\"mytable2\" as stage)"; Condition condition = new InCondition(field1, selectExpression); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java index 706cbd9e2f9..228aa82635c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java @@ -96,8 +96,7 @@ void testInsertWithSelect() null, Arrays.asList(item1, item2, item3, item4, item5, item6, item7, item8, item9, item10, item11, item12), Collections.singletonList(tableToSelect), - null, - Collections.emptyList()); + null); InsertStatement insertStatement = new InsertStatement(tableToInsert, columns, selectExpression); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java index aea7afd123d..36b0dbc8207 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.quantifiers.DistinctQuantifier; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.Condition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.comparison.EqualityCondition; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.comparison.GreaterThanCondition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.comparison.NotEqualCondition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.logical.AndCondition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.select.SelectExpression; @@ -55,8 +56,7 @@ void genSqlForSimpleSelect() new DistinctQuantifier(), Arrays.asList(item1, item2), Collections.singletonList(table), - null, - Collections.emptyList()); + null); String sql1 = BaseTest.genSqlIgnoringErrors(selectExpression); String expected = "SELECT DISTINCT \"item1\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\""; @@ -73,8 +73,7 @@ void genSqlForSimpleSelectStarWithLimit() null, Collections.singletonList(new All(BaseTest.QUOTE_IDENTIFIER)), Collections.singletonList(table), - null, - Collections.emptyList()); + null); selectStatement.setLimit(10); String sql1 = BaseTest.genSqlIgnoringErrors(selectStatement); @@ -99,8 +98,7 @@ void genSqlForCondtionalSelect() null, Arrays.asList(item1, item2), Collections.singletonList(table), - condition, - Collections.emptyList()); + condition); String sql1 = BaseTest.genSqlIgnoringErrors(selectExpression); String expected = "SELECT \"item1\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" WHERE (\"item1\" = 100) AND (\"item2\" <> 50)"; @@ -127,8 +125,7 @@ void genSqlForInnerJoin() new DistinctQuantifier(), Arrays.asList(item1, item2, item3), Collections.singletonList(table), - null, - Collections.emptyList()); + null); String sql1 = BaseTest.genSqlIgnoringErrors(selectExpression); String expected = "SELECT DISTINCT A.\"id\",A.\"item2\",B.\"item3\" as \"my_item\" FROM \"mydb\".\"left\" as A INNER JOIN \"mydb\".\"right\" as B ON A.\"id\" = B.\"id\""; @@ -144,8 +141,7 @@ void genSqlForSelectSelectItemsMissing() new DistinctQuantifier(), Collections.emptyList(), Collections.singletonList(table), - null, - Collections.emptyList()); + null); try { BaseTest.genSql(selectExpression); @@ -164,19 +160,47 @@ void genSqlForSelectWithConditionAndGroupBy() Field item1 = new Field(null, "item1", BaseTest.QUOTE_IDENTIFIER, null); Field item2 = new Field(null, "item2", BaseTest.QUOTE_IDENTIFIER, "my_item"); - Condition condition = new NotEqualCondition(item2, new NumericalValue(50L, BaseTest.QUOTE_IDENTIFIER)); + Condition whereCondition = new NotEqualCondition(item2, new NumericalValue(50L, BaseTest.QUOTE_IDENTIFIER)); + Condition havingCondition = new GreaterThanCondition(new Field("count", BaseTest.QUOTE_IDENTIFIER), new NumericalValue(1L, BaseTest.QUOTE_IDENTIFIER)); Function countFunction = new Function(FunctionName.COUNT, Collections.singletonList(item1), BaseTest.QUOTE_IDENTIFIER); + countFunction.setAlias("count"); SelectExpression selectExpression = new SelectStatement( null, Arrays.asList(countFunction, item2), Collections.singletonList(table), - condition, - Collections.singletonList(item2)); + whereCondition, + Collections.singletonList(item2), + havingCondition); + + String sql = BaseTest.genSqlIgnoringErrors(selectExpression); + String expected = "SELECT COUNT(\"item1\") as \"count\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" WHERE \"item2\" <> 50 GROUP BY \"item2\" HAVING \"count\" > 1"; + assertEquals(expected, sql); + } + + @Test + void genSqlForSelectWithoutConditionAndGroupBy() + { + Table table = new Table("mydb", null, "mytable", null, BaseTest.QUOTE_IDENTIFIER); + + Field item1 = new Field(null, "item1", BaseTest.QUOTE_IDENTIFIER, null); + Field item2 = new Field(null, "item2", BaseTest.QUOTE_IDENTIFIER, "my_item"); + Condition havingCondition = new GreaterThanCondition(new Field("count", BaseTest.QUOTE_IDENTIFIER), new NumericalValue(1L, BaseTest.QUOTE_IDENTIFIER)); + Function countFunction = new Function(FunctionName.COUNT, Collections.singletonList(item1), BaseTest.QUOTE_IDENTIFIER); + countFunction.setAlias("count"); + + SelectExpression selectExpression = + new SelectStatement( + null, + Arrays.asList(countFunction, item2), + Collections.singletonList(table), + null, + Collections.singletonList(item2), + havingCondition); String sql = BaseTest.genSqlIgnoringErrors(selectExpression); - String expected = "SELECT COUNT(\"item1\"),\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" WHERE \"item2\" <> 50 GROUP BY \"item2\""; + String expected = "SELECT COUNT(\"item1\") as \"count\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" GROUP BY \"item2\" HAVING \"count\" > 1"; assertEquals(expected, sql); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java index 4827ff062ef..8121d0dd091 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java @@ -102,16 +102,14 @@ void genSqlForUpdateWithJoin() null, Collections.singletonList(new Field(joinTable.getAlias(), "col1", BaseTest.QUOTE_IDENTIFIER, null)), Collections.singletonList(joinTable), - pkMatchCondition, - Collections.emptyList()), + pkMatchCondition), BaseTest.QUOTE_IDENTIFIER)), new Pair<>(new Field("col2", BaseTest.QUOTE_IDENTIFIER), new SelectValue( new SelectStatement( null, Collections.singletonList(new Field(joinTable.getAlias(), "col2", BaseTest.QUOTE_IDENTIFIER, null)), Collections.singletonList(joinTable), - pkMatchCondition, - Collections.emptyList()), + pkMatchCondition), BaseTest.QUOTE_IDENTIFIER))); Condition whereCondition = new ExistsCondition( @@ -119,8 +117,7 @@ void genSqlForUpdateWithJoin() null, Collections.singletonList(new All(BaseTest.QUOTE_IDENTIFIER)), Collections.singletonList(joinTable), - pkMatchCondition, - Collections.emptyList())); + pkMatchCondition)); UpdateStatement query = new UpdateStatement(table, setPairs, whereCondition); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java index bd1b5a5fcea..869c8d3fb06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java @@ -1497,6 +1497,34 @@ public static Dataset createDatasetWithUpdatedField(Dataset dataset, Field field return dataset.withSchema(dataset.schema().withFields(newFields)); } + public static void assertEquals(List> expectedList, List> actualList) + { + if (expectedList.size() != actualList.size()) + { + Assertions.fail("Size of expected List does not match actual List"); + } + + for (int i = 0; i < actualList.size(); i++) + { + Map expected = expectedList.get(i); + Map actual = actualList.get(i); + for (Map.Entry entry : expected.entrySet()) + { + Object actualObj = actual.get(entry.getKey()); + Object expectedObj = entry.getValue(); + if (expectedObj == null && actualObj != null) + { + Assertions.fail(String.format("Values mismatch. key: %s, actual value: %s, expected value: %s", entry.getKey(), actualObj, expectedObj)); + } + if (expectedObj != null && !expectedObj.toString().equals(actualObj.toString())) + { + Assertions.fail(String.format("Values mismatch. key: %s, actual value: %s, expected value: %s", entry.getKey(), actualObj, expectedObj)); + } + + } + } + } + private static List readCsvData(String path) throws IOException { List lines = new ArrayList<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java index 8cda4be62fe..fbd1db21ff2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -33,12 +33,15 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.exception.DataQualityException; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -174,9 +177,17 @@ void testNoDedupMaxVersioning() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + row1.put("legend_persistence_error_count", 2); + expectedSampleRows.add(row1); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -230,9 +241,17 @@ void testNoDedupAllVersion() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + row1.put("legend_persistence_error_count", 2); + expectedSampleRows.add(row1); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -309,9 +328,17 @@ void testFilterDupsMaxVersion() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + row1.put("legend_persistence_error_count", 2); + expectedSampleRows.add(row1); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -372,9 +399,17 @@ void testFilterDupsAllVersion() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + row1.put("legend_persistence_error_count", 2); + expectedSampleRows.add(row1); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -398,9 +433,21 @@ void testFailOnDupsNoVersioning() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row1 = new HashMap<>(); + row1.put("name", "Andy"); + row1.put("id", 1); + row1.put("legend_persistence_count", 3); + Map row2 = new HashMap<>(); + row2.put("name", "Becky"); + row2.put("id", 2); + row2.put("legend_persistence_count", 2); + expectedSampleRows.add(row1); + expectedSampleRows.add(row2); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -436,9 +483,16 @@ void testFailOnDupsMaxVersionDoNotPerform() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + row.put("legend_persistence_count", 2); + expectedSampleRows.add(row); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -474,9 +528,16 @@ void testFailOnDupsMaxVersion() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + row.put("legend_persistence_count", 2); + expectedSampleRows.add(row); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -515,9 +576,16 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { + List> expectedSampleRows = new ArrayList<>(); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + row.put("legend_persistence_count", 2); + expectedSampleRows.add(row); Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } @@ -555,9 +623,16 @@ void testFailOnDupsAllVersion() throws Exception performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + List> expectedSampleRows = new ArrayList<>(); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + row.put("legend_persistence_count", 2); + expectedSampleRows.add(row); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java index 0ac9cceb98b..3ff5c9baa25 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java @@ -527,4 +527,16 @@ public class MemsqlTestArtifacts public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + + public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE stage.`legend_persistence_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + "as stage GROUP BY `ID`, `NAME`, `BIZ_DATE` HAVING `LEGEND_PERSISTENCE_ERROR_COUNT` > 1 LIMIT 20"; + + public static String dataErrorsSql = "SELECT `id`,`name`,`version`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 03665971154..96f9b10b58a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -27,6 +27,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { String rowsDeleted = "SELECT COUNT(*) as `rowsDeleted` FROM `mydb`.`main` as sink"; @@ -81,7 +84,7 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe List milestoningSqlList = operations.ingestSql(); List metaIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + @@ -93,6 +96,10 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe "(SELECT COUNT(DISTINCT(`amount`)) as `legend_persistence_distinct_rows` " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`amount`)) as `legend_persistence_error_count` " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(2)); @@ -103,8 +110,10 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); // Stats verifyStats(operations, "staging"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 2f8df95c9c9..a6f5014a130 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -23,7 +23,8 @@ import java.util.List; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.MAX_DATA_ERRORS; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @@ -386,6 +387,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -431,6 +433,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java index 81c42dab355..2b8f567bebd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; @@ -24,6 +24,8 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBatchIdBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; @@ -69,7 +71,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + @@ -89,7 +91,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(2)); Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 2d66fb08abf..bae77413ca7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; @@ -69,7 +69,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + @@ -93,7 +93,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -120,7 +121,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = " + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + @@ -147,7 +148,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSqlWithBizDateVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index ab72f14d23e..dbabec09fdd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; @@ -24,6 +24,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { @@ -70,7 +73,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + @@ -97,8 +100,10 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java index d80c4b42c2a..a10b0620bc5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java @@ -47,7 +47,7 @@ void testCopyStatementWithFilesAndStandardFileFormat() throws SqlDomException new StagedFilesField(QUOTE_IDENTIFIER, 3, "t", "field3"), new StagedFilesField(QUOTE_IDENTIFIER, 4, "t", "field4") ); - SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null, null); + SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null); Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); List columns = Arrays.asList( @@ -89,7 +89,7 @@ void testCopyStatementWithPatternAndFileFormatAndForceOption() throws SqlDomExce new StagedFilesField(QUOTE_IDENTIFIER, 1, "t", "field4","field4") ); - SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null, null); + SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null); Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); List columns = Arrays.asList( From c4f87e69e2003a9676d91b86b28d22fa792b76b0 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Fri, 16 Feb 2024 11:09:05 +0530 Subject: [PATCH 06/32] Expose sample Row count as a parameter --- .../engine/persistence/components/AnsiTestArtifacts.java | 2 +- .../relational/api/RelationalGeneratorAbstract.java | 7 +++++++ .../relational/api/RelationalIngestorAbstract.java | 6 ++++++ .../ingestmode/nontemporal/NontemporalDeltaTestCases.java | 1 + .../UnitmemporalDeltaBatchIdBasedTestCases.java | 1 + 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java index db14f25cfc3..700bc1aa1b7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java @@ -631,7 +631,7 @@ public static String getDropTempTableQuery(String tableName) "as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 20"; public static String dataErrorsSqlUpperCase = "SELECT \"ID\",\"NAME\",\"VERSION\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + - "as stage GROUP BY \"ID\", \"NAME\", \"VERSION\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 20"; + "as stage GROUP BY \"ID\", \"NAME\", \"VERSION\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 10"; public static String dataErrorsSql = "SELECT \"id\",\"name\",\"version\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"version\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index 1bd6afb4651..fa3da3f3d43 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -131,6 +131,12 @@ public String batchSuccessStatusValue() return MetadataUtils.MetaTableStatus.DONE.toString(); } + @Default + public int sampleRowCount() + { + return 20; + } + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); @@ -149,6 +155,7 @@ protected PlannerOptions plannerOptions() .putAllAdditionalMetadata(additionalMetadata()) .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) + .sampleRowCount(sampleRowCount()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 17149221e63..d7e2fe07c01 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -165,6 +165,12 @@ public String batchSuccessStatusValue() return MetadataUtils.MetaTableStatus.DONE.toString(); } + @Default + public int sampleRowCount() + { + return 20; + } + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java index 8b90ae0de7b..b9b5332c9bb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java @@ -336,6 +336,7 @@ void testNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) + .sampleRowCount(10) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java index c7a41bb0978..4cb7b8dd3ac 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java @@ -377,6 +377,7 @@ void testUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilte .cleanupStagingData(true) .caseConversion(CaseConversion.TO_UPPER) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) + .sampleRowCount(10) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(operations); From 6385fac312ce0f25771cbbb738a9703f5dd3edff Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Tue, 20 Feb 2024 11:23:40 +0530 Subject: [PATCH 07/32] Dry run changes for Ingestor Mode --- .../StagedFilesDatasetProperties.java | 6 ++ .../components/planner/BulkLoadPlanner.java | 6 +- .../relational/ansi/AnsiSqlSink.java | 9 +-- .../components/relational/RelationalSink.java | 8 +-- .../relational/api/DataErrorAbstract.java | 48 ++++++++++++++++ .../relational/api/DryRunResultAbstract.java | 3 +- .../api/RelationalIngestorAbstract.java | 15 +++-- .../relational/snowflake/SnowflakeSink.java | 55 ++++++++++++++----- ...eStagedFilesDatasetPropertiesAbstract.java | 15 +++++ .../components/ingestmode/BulkLoadTest.java | 33 ++++------- 10 files changed, 138 insertions(+), 60 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java index 8dae01e0dc5..e15d8dc7623 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java @@ -24,6 +24,12 @@ public interface StagedFilesDatasetProperties List filePatterns(); + @Value.Derived + default boolean dryRunSupported() + { + return false; + } + @Value.Check default void validate() { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index ecdb551f577..5d348b6f47b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -119,7 +119,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) public LogicalPlan buildLogicalPlanForDryRun(Resources resources) { List operations = new ArrayList<>(); - if (capabilities.contains(Capability.DRY_RUN)) + if (capabilities.contains(Capability.DRY_RUN) && stagedFilesDataset.stagedFilesDatasetProperties().dryRunSupported()) { Dataset validationDataset = getValidationDataset(); Copy copy = Copy.builder() @@ -207,7 +207,7 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) { List operations = new ArrayList<>(); - if (capabilities.contains(Capability.DRY_RUN)) + if (capabilities.contains(Capability.DRY_RUN) && stagedFilesDataset.stagedFilesDatasetProperties().dryRunSupported()) { operations.add(Create.of(true, getValidationDataset())); } @@ -294,7 +294,7 @@ private Dataset getValidationDataset() .schema(stagedFilesDataset.schema()) .database(mainDataset().datasetReference().database()) .group(mainDataset().datasetReference().group()) - .name(tableName + UNDERSCORE + "validation") + .name(tableName + UNDERSCORE + "validation") // TODO legend_persistence .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 48b1f93640a..c4e6a054513 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -147,6 +147,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.TabularValuesVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.TruncateVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.WindowFunctionVisitor; +import org.finos.legend.engine.persistence.components.relational.api.DataError; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; @@ -156,11 +157,7 @@ import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; public class AnsiSqlSink extends RelationalSink { @@ -330,7 +327,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan) + public List performDryRun(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) { throw new UnsupportedOperationException("DryRun not supported!"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 69895ae78e6..e10ed0a00b2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -23,6 +23,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.relational.api.DataError; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.executor.RelationalExecutionHelper; @@ -33,10 +34,7 @@ import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; -import java.util.Collections; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; public abstract class RelationalSink implements Sink { @@ -192,5 +190,5 @@ public interface ConstructDatasetFromDatabase public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); - public abstract void performDryRun(Executor executor, SqlPlan dryRunSqlPlan); + public abstract List performDryRun(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java new file mode 100644 index 00000000000..f69e207bd00 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java @@ -0,0 +1,48 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.api; + +import org.immutables.value.Value; + +import java.util.Optional; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface DataErrorAbstract +{ + String errorMessage(); + + String file(); + + String errorCategory(); + + String columnName(); + + Optional lineNumber(); + + Optional characterPosition(); + + Optional rowNumber(); + + Optional rowStartLineNumber(); + + Optional rejectedRecord(); +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java index 4a88ebb2f7a..a234a15e855 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java @@ -17,7 +17,6 @@ import org.immutables.value.Value; import java.util.List; -import java.util.Map; @Value.Immutable @Value.Style( @@ -31,5 +30,5 @@ public abstract class DryRunResultAbstract { public abstract IngestStatus status(); - public abstract List> errorRecords(); + public abstract List errorRecords(); } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index d7e2fe07c01..f8751d73883 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -269,14 +269,17 @@ public SchemaEvolutionResult evolve() } /* - - Perform dry run of Ingestion - only supported for Bulk Load atm + - Perform dry run of Ingestion - only supported for Bulk Load */ - public void dryRun() + public DryRunResult dryRun() { LOGGER.info("Invoked dryRun method, will perform the dryRun"); validateDatasetsInitialization(); - // TODO invoke dry run + List dataErrors = performDryRun(); + IngestStatus ingestStatus = dataErrors.isEmpty() ? IngestStatus.SUCCEEDED : IngestStatus.FAILED; + DryRunResult dryRunResult = DryRunResult.builder().status(ingestStatus).addAllErrorRecords(dataErrors).build(); LOGGER.info("DryRun completed"); + return dryRunResult; } @@ -526,15 +529,15 @@ private List ingest(List dataSplitRanges, Schema } } - private void performDryRun() + private List performDryRun() { if (enrichedIngestMode instanceof BulkLoad) { - relationalSink().performDryRun(executor, generatorResult.ingestSqlPlan()); + return relationalSink().performDryRun(executor, generatorResult.dryRunSqlPlan(), sampleRowCount()); } else { - throw new RuntimeException("dry Run not supported for this ingest Mode : " + enrichedIngestMode.getClass().getSimpleName()); + throw new RuntimeException("Dry Run not supported for this ingest Mode : " + enrichedIngestMode.getClass().getSimpleName()); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 96c0c14b2ab..83c5ba861eb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -41,6 +41,7 @@ import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.api.DataError; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; @@ -246,25 +247,36 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } - public void performDryRun(Executor executor, SqlPlan dryRunSqlPlan) + public List performDryRun(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) { - List results = executor.executePhysicalPlanAndGetResults(dryRunSqlPlan, 25); - List> resultSets = results.get(0).getData(); - for (Map row: resultSets) + if (dryRunSqlPlan == null || dryRunSqlPlan.getSqlList().isEmpty()) { - Object error = row.get(ERROR); - Object file = row.get(FILE_WITH_ERROR); - Object line = row.get(LINE); - Object character = row.get(CHARACTER); - Object byteOffset = row.get(BYTE_OFFSET); - Object category = row.get(CATEGORY); - Object columnName = row.get(COLUMN_NAME); - Object rowNumber = row.get(ROW_NUMBER); - Object rowStartLine = row.get(ROW_START_LINE); - Object rejectedRecord = row.get(REJECTED_RECORD); + throw new RuntimeException("DryRun supported for this ingest"); } - } + List results = executor.executePhysicalPlanAndGetResults(dryRunSqlPlan, sampleRowCount); + List dataErrors = new ArrayList<>(); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + DataError dataError = DataError.builder() + .errorMessage(getString(row, ERROR)) + .file(getString(row, FILE_WITH_ERROR)) + .errorCategory(getString(row, CATEGORY)) + .columnName(getString(row, COLUMN_NAME)) + .lineNumber(getLong(row, LINE)) + .characterPosition(getLong(row, CHARACTER)) + .rowNumber(getLong(row, ROW_NUMBER)) + .rowStartLineNumber(getLong(row, ROW_START_LINE)) + .rejectedRecord(getString(row, REJECTED_RECORD)) + .build(); + dataErrors.add(dataError); + } + } + return dataErrors; + } @Override public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) @@ -363,4 +375,17 @@ private String getErrorMessage(Map row) throw new RuntimeException(e); } } + + private String getString(Map row, String key) + { + Object value = row.get(key); + return value == null ? null : (String) value; + } + + private Long getLong(Map row, String key) + { + Object value = row.get(key); + return value == null ? null : (Long) value; + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java index 751698e6ad2..5a93bfa70b8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; @@ -36,4 +37,18 @@ public interface SnowflakeStagedFilesDatasetPropertiesAbstract extends StagedFil Optional fileFormat(); Map copyOptions(); + + @Value.Derived + default boolean dryRunSupported() + { + // Only supported for CSV + boolean dryRunSupported = false; + if (fileFormat().isPresent() && fileFormat().get() instanceof StandardFileFormatAbstract) + { + StandardFileFormatAbstract standardFileFormatAbstract = (StandardFileFormatAbstract) fileFormat().get(); + dryRunSupported = standardFileFormatAbstract.formatType().equals(FileFormatType.CSV); + } + + return dryRunSupported; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 80d8cc42d10..d0db719fb61 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -192,9 +192,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); - List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); - List dryRunSql = operations.dryRunSql(); List metaIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); @@ -212,19 +210,10 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + "PARSE_JSON('{\"event_id\":\"task123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}')," + "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; - String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation\"" + - "(\"col_bigint\" BIGINT,\"col_variant\" VARIANT)"; - String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation\" FROM my_location " + - "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + - "FILE_FORMAT = (TYPE = 'AVRO') " + - "ON_ERROR = 'ABORT_STATEMENT' " + - "VALIDATION_MODE = 'RETURN_ERRORS'"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetaIngestSql, metaIngestSql.get(0)); - Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); - Assertions.assertEquals(expectedDryRunSql, dryRunSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -266,9 +255,7 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); - List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); - List dryRunSql = operations.dryRunSql(); List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); @@ -289,19 +276,9 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() "(SELECT 'MY_NAME',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')," + "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; - String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME_VALIDATION\"" + - "(\"COL_INT\" INTEGER,\"COL_INTEGER\" INTEGER)"; - String expectedDryRunSql = "COPY INTO \"MY_DB\".\"MY_NAME_VALIDATION\" FROM my_location " + - "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + - "FILE_FORMAT = (FORMAT_NAME = 'my_file_format') " + - "ON_ERROR = 'ABORT_STATEMENT' " + - "VALIDATION_MODE = 'RETURN_ERRORS'"; - Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); - Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); - Assertions.assertEquals(expectedDryRunSql, dryRunSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -629,8 +606,18 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() "FILE_FORMAT = (FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE'"; + String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation\"" + + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; + String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation\" FROM my_location " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + + "ON_ERROR = 'SKIP_FILE' " + + "VALIDATION_MODE = 'RETURN_ERRORS'"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals(expectedDryRunPreActionsSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunSql, operations.dryRunSql().get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); From a032d01bcf59a6f07c7a3e4b469e500fb0df8e48 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Tue, 20 Feb 2024 12:38:06 +0530 Subject: [PATCH 08/32] Fix the Data Error Definition --- .../relational/api/DataErrorAbstract.java | 2 +- .../relational/snowflake/SnowflakeSink.java | 53 +++++++++---------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java index f69e207bd00..1c41520ae06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java @@ -34,7 +34,7 @@ public interface DataErrorAbstract String errorCategory(); - String columnName(); + Optional columnName(); Optional lineNumber(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 83c5ba861eb..ce7e85c5536 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -262,9 +262,9 @@ public List performDryRun(Executor exec for (Map row : resultSets) { DataError dataError = DataError.builder() - .errorMessage(getString(row, ERROR)) - .file(getString(row, FILE_WITH_ERROR)) - .errorCategory(getString(row, CATEGORY)) + .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) + .file(getString(row, FILE_WITH_ERROR).orElseThrow(IllegalStateException::new)) + .errorCategory(getString(row, CATEGORY).orElseThrow(IllegalStateException::new)) .columnName(getString(row, COLUMN_NAME)) .lineNumber(getLong(row, LINE)) .characterPosition(getLong(row, CHARACTER)) @@ -292,32 +292,32 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor row: resultSets) { - Object bulkLoadStatus = row.get(BULK_LOAD_STATUS); - Object filePath = row.get(FILE); - if (Objects.nonNull(bulkLoadStatus) && Objects.nonNull(filePath)) + Optional bulkLoadStatus = getString(row, BULK_LOAD_STATUS); + Optional filePath = getString(row, FILE); + if (bulkLoadStatus.isPresent() && filePath.isPresent()) { - if (bulkLoadStatus.equals(LOADED)) + if (bulkLoadStatus.get().equals(LOADED)) { totalFilesLoaded++; } else { // if partially loaded or load failed - dataFilePathsWithErrors.add(filePath.toString()); + dataFilePathsWithErrors.add(filePath.get()); errorMessages.add(getErrorMessage(row)); } } - Object rowsWithError = row.get(ERRORS_SEEN); - if (Objects.nonNull(rowsWithError)) + Optional rowsWithError = getLong(row, ERRORS_SEEN); + if (rowsWithError.isPresent()) { - totalRowsWithError += (Long) row.get(ERRORS_SEEN); + totalRowsWithError += rowsWithError.get(); } - Object rowsLoaded = row.get(ROWS_LOADED); - if (Objects.nonNull(rowsLoaded)) + Optional rowsLoaded = getLong(row, ROWS_LOADED); + if (rowsLoaded.isPresent()) { - totalRowsLoaded += (Long) row.get(ROWS_LOADED); + totalRowsLoaded += rowsLoaded.get(); } } @@ -354,16 +354,11 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor row) { Map errorInfoMap = new HashMap<>(); - Object filePath = row.get(FILE); - Object bulkLoadStatus = row.get(BULK_LOAD_STATUS); - Object errorsSeen = row.get(ERRORS_SEEN); - Object firstError = row.get(FIRST_ERROR); - Object firstErrorColumnName = row.get(FIRST_ERROR_COLUMN_NAME); - errorInfoMap.put(FILE, filePath); - errorInfoMap.put(BULK_LOAD_STATUS, bulkLoadStatus); - errorInfoMap.put(ERRORS_SEEN, errorsSeen); - errorInfoMap.put(FIRST_ERROR, firstError); - errorInfoMap.put(FIRST_ERROR_COLUMN_NAME, firstErrorColumnName); + errorInfoMap.put(FILE, row.get(FILE)); + errorInfoMap.put(BULK_LOAD_STATUS, row.get(BULK_LOAD_STATUS)); + errorInfoMap.put(ERRORS_SEEN, row.get(ERRORS_SEEN)); + errorInfoMap.put(FIRST_ERROR, row.get(FIRST_ERROR)); + errorInfoMap.put(FIRST_ERROR_COLUMN_NAME, row.get(FIRST_ERROR_COLUMN_NAME)); ObjectMapper objectMapper = new ObjectMapper(); try @@ -376,16 +371,18 @@ private String getErrorMessage(Map row) } } - private String getString(Map row, String key) + private Optional getString(Map row, String key) { Object value = row.get(key); - return value == null ? null : (String) value; + String strValue = value == null ? null : (String) value; + return Optional.ofNullable(strValue); } - private Long getLong(Map row, String key) + private Optional getLong(Map row, String key) { Object value = row.get(key); - return value == null ? null : (Long) value; + Long longValue = value == null ? null : (Long) value; + return Optional.ofNullable(longValue); } } From 666e16a01369686ce6e70aae317a8c9480562785 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Tue, 27 Feb 2024 09:46:31 +0530 Subject: [PATCH 09/32] Address code review comments --- ...va => DeriveDataErrorRowsLogicalPlan.java} | 6 ++-- ...ava => DeriveMaxDataErrorLogicalPlan.java} | 4 +-- .../components/planner/Planner.java | 4 +-- .../components/util/LogicalPlanUtils.java | 6 ---- .../components/util/TableNameGenUtils.java | 34 +++++++++++++++++++ .../bigquery/executor/BigQueryExecutor.java | 3 +- .../api/RelationalIngestorAbstract.java | 5 ++- ...eStagedFilesDatasetPropertiesAbstract.java | 6 ++-- .../schemaops/statements/CopyStatement.java | 2 +- 9 files changed, 50 insertions(+), 20 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/{DeriveDataErrorsLogicalPlan.java => DeriveDataErrorRowsLogicalPlan.java} (93%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/{DeriveDataErrorCheckLogicalPlan.java => DeriveMaxDataErrorLogicalPlan.java} (95%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java similarity index 93% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java index 1ba88677201..836620188e0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorsLogicalPlan.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java @@ -1,4 +1,4 @@ -// Copyright 2023 Goldman Sachs +// Copyright 2024 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,14 +23,14 @@ import java.util.ArrayList; import java.util.List; -public class DeriveDataErrorsLogicalPlan implements VersioningStrategyVisitor +public class DeriveDataErrorRowsLogicalPlan implements VersioningStrategyVisitor { private List primaryKeys; private List remainingColumns; private Dataset tempStagingDataset; private int sampleRowCount; - public DeriveDataErrorsLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset, int sampleRowCount) + public DeriveDataErrorRowsLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset, int sampleRowCount) { this.primaryKeys = primaryKeys; this.remainingColumns = remainingColumns; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveMaxDataErrorLogicalPlan.java similarity index 95% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveMaxDataErrorLogicalPlan.java index 02afced5a4b..e5c1e2faf78 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveMaxDataErrorLogicalPlan.java @@ -25,14 +25,14 @@ import java.util.ArrayList; import java.util.List; -public class DeriveDataErrorCheckLogicalPlan implements VersioningStrategyVisitor +public class DeriveMaxDataErrorLogicalPlan implements VersioningStrategyVisitor { List primaryKeys; List remainingColumns; Dataset tempStagingDataset; - public DeriveDataErrorCheckLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset) + public DeriveMaxDataErrorLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset) { this.primaryKeys = primaryKeys; this.remainingColumns = remainingColumns; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index ba37e76e798..db078c7eadc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -446,13 +446,13 @@ protected void addDataErrorCheck(Map d List remainingColumns = getDigestOrRemainingColumns(); if (ingestMode.versioningStrategy().accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED)) { - LogicalPlan logicalPlanForDataErrorCheck = ingestMode.versioningStrategy().accept(new DeriveDataErrorCheckLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); + LogicalPlan logicalPlanForDataErrorCheck = ingestMode.versioningStrategy().accept(new DeriveMaxDataErrorLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); if (logicalPlanForDataErrorCheck != null) { dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS, logicalPlanForDataErrorCheck); } - LogicalPlan logicalPlanForDataErrors = ingestMode.versioningStrategy().accept(new DeriveDataErrorsLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset(), options().sampleRowCount())); + LogicalPlan logicalPlanForDataErrors = ingestMode.versioningStrategy().accept(new DeriveDataErrorRowsLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset(), options().sampleRowCount())); if (logicalPlanForDataErrors != null) { dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS, logicalPlanForDataErrors); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index ba83a307734..b2c193d4b7b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -93,12 +93,6 @@ private LogicalPlanUtils() { } - public static String generateTableNameWithSuffix(String tableName, String suffix) - { - UUID uuid = UUID.randomUUID(); - return tableName + UNDERSCORE + suffix + UNDERSCORE + uuid; - } - public static Value INFINITE_BATCH_ID() { return InfiniteBatchIdValue.builder().build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java new file mode 100644 index 00000000000..3693b68f18f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java @@ -0,0 +1,34 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import java.util.UUID; + +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.UNDERSCORE; + +public class TableNameGenUtils +{ + private static String generateTableSuffix() + { + UUID uuid = UUID.randomUUID(); + int uuidHashCode = Math.abs(uuid.hashCode()); + return UNDERSCORE + "LP" + UNDERSCORE + Integer.toString(uuidHashCode, 36); + } + + public static String generateTableName(String baseTableName, String suffix) + { + return baseTableName + UNDERSCORE + suffix + UNDERSCORE + generateTableSuffix(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java index 591b552b97f..bd89b59e866 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java @@ -96,8 +96,7 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) @Override public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, int rows) { - // TODO to be implemented - return null; + throw new RuntimeException("Not implemented for Big Query"); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index f8751d73883..1d416f13eae 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -43,6 +43,7 @@ import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; import org.finos.legend.engine.persistence.components.util.SqlLogging; import org.immutables.value.Value.Default; @@ -533,6 +534,7 @@ private List performDryRun() { if (enrichedIngestMode instanceof BulkLoad) { + executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); return relationalSink().performDryRun(executor, generatorResult.dryRunSqlPlan(), sampleRowCount()); } else @@ -662,6 +664,7 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) .putAllAdditionalMetadata(placeholderAdditionalMetadata) .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) + .sampleRowCount(sampleRowCount()) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, generator.plannerOptions(), relationalSink().capabilities()); @@ -753,7 +756,7 @@ private Datasets importExternalDataset(Datasets datasets) DatasetReference mainDataSetReference = datasets.mainDataset().datasetReference(); externalDatasetReference = externalDatasetReference - .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : LogicalPlanUtils.generateTableNameWithSuffix(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING)) + .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : TableNameGenUtils.generateTableName(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING)) .withDatabase(externalDatasetReference.database().isPresent() ? externalDatasetReference.database().get() : mainDataSetReference.database().orElse(null)) .withGroup(externalDatasetReference.group().isPresent() ? externalDatasetReference.group().get() : mainDataSetReference.group().orElse(null)) .withAlias(externalDatasetReference.alias().isPresent() ? externalDatasetReference.alias().get() : mainDataSetReference.alias().orElseThrow(RuntimeException::new) + UNDERSCORE + STAGING); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java index 5a93bfa70b8..6627b0d6b53 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java @@ -43,10 +43,10 @@ default boolean dryRunSupported() { // Only supported for CSV boolean dryRunSupported = false; - if (fileFormat().isPresent() && fileFormat().get() instanceof StandardFileFormatAbstract) + if (fileFormat().isPresent() && fileFormat().get() instanceof StandardFileFormat) { - StandardFileFormatAbstract standardFileFormatAbstract = (StandardFileFormatAbstract) fileFormat().get(); - dryRunSupported = standardFileFormatAbstract.formatType().equals(FileFormatType.CSV); + StandardFileFormat standardFileFormat = (StandardFileFormat) fileFormat().get(); + dryRunSupported = standardFileFormat.formatType().equals(FileFormatType.CSV); } return dryRunSupported; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java index b2aa09a3f83..acb5f43a8d2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java @@ -223,7 +223,7 @@ void validate() throws SqlDomException { if (srcTable == null) { - throw new SqlDomException("selectStatement is mandatory for Copy Table Command"); + throw new SqlDomException("srcTable is mandatory for Copy Table Command"); } if (table == null) From 1cc22768389b016a8694b702d118ad5244a3a9bf Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 27 Feb 2024 16:48:00 +0800 Subject: [PATCH 10/32] Implement bulk load dry run generic approach --- .../logicalplan/LogicalPlanFactory.java | 11 + .../StagedFilesDatasetProperties.java | 2 +- .../logicalplan/operations/CopyAbstract.java | 2 +- .../values/MetadataFileNameFieldAbstract.java | 33 +++ .../MetadataRowNumberFieldAbstract.java | 30 +++ .../values/TryCastFunctionAbstract.java | 35 +++ .../components/planner/BulkLoadPlanner.java | 149 ++++++++++++- .../components/planner/Planner.java | 7 + .../components/util/Capability.java | 3 +- .../components/util/LogicalPlanUtils.java | 5 +- .../components/util/ValidationCategory.java | 33 +++ .../relational/ansi/AnsiSqlSink.java | 4 +- .../components/relational/RelationalSink.java | 38 +++- .../api/GeneratorResultAbstract.java | 16 ++ .../api/RelationalGeneratorAbstract.java | 16 ++ .../api/RelationalIngestorAbstract.java | 3 +- .../relational/sqldom/common/Clause.java | 3 + .../schemaops/values/WindowFunction.java | 11 +- .../sqldom/schemaops/WindowFunctionTest.java | 18 -- .../components/relational/h2/H2Sink.java | 87 ++++++++ .../visitor/MetadataFileNameFieldVisitor.java | 39 ++++ .../MetadataRowNumberFieldVisitor.java | 36 ++++ .../visitor/StagedFilesDatasetVisitor.java | 2 +- .../sql/visitor/TryCastFunctionVisitor.java | 49 +++++ .../sqldom/schemaops/values/CastFunction.java | 69 ++++++ .../ingestmode/bulkload/BulkLoadTest.java | 202 +++++++++++++++++- .../data/bulk-load/input/bad_file.csv | 3 + .../input/good_file_with_edge_case.csv | 3 + .../relational/snowflake/SnowflakeSink.java | 93 +++++--- ...eStagedFilesDatasetPropertiesAbstract.java | 8 +- .../visitor/MetadataFileNameFieldVisitor.java | 32 +++ .../MetadataRowNumberFieldVisitor.java | 32 +++ .../visitor/StagedFilesDatasetVisitor.java | 2 +- .../sql/visitor/TryCastFunctionVisitor.java | 49 +++++ .../values/MetadataFileNameColumn.java | 40 ++++ .../values/MetadataRowNumberColumn.java | 40 ++++ .../schemaops/values/TryCastFunction.java | 69 ++++++ .../components/ingestmode/BulkLoadTest.java | 16 +- 38 files changed, 1204 insertions(+), 86 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameColumn.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java index 9199836cb2d..a20ddfd49be 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.logicalplan; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -116,4 +117,14 @@ public static LogicalPlan getLogicalPlanForMaxOfField(Dataset dataset, String fi .source(dataset).build(); return LogicalPlan.builder().addOps(selection).build(); } + + public static LogicalPlan getLogicalPlanForSelectAllFieldsWithStringFieldEquals(FieldValue field, String fieldValue) + { + Selection selection = Selection.builder() + .addFields(All.INSTANCE) + .source(field.datasetRef()) + .condition(Equals.of(field, StringValue.of(fieldValue))) + .build(); + return LogicalPlan.builder().addOps(selection).build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java index e15d8dc7623..b6270d52b60 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java @@ -25,7 +25,7 @@ public interface StagedFilesDatasetProperties List filePatterns(); @Value.Derived - default boolean dryRunSupported() + default boolean validationModeSupported() { return false; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java index 8f5a744bd67..27f62edbae0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java @@ -43,7 +43,7 @@ public interface CopyAbstract extends Operation StagedFilesDatasetProperties stagedFilesDatasetProperties(); @org.immutables.value.Value.Default - default boolean dryRun() + default boolean dryRun() // TODO: rename this to validationModeSupported { return false; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java new file mode 100644 index 00000000000..95e1a9dff6d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java @@ -0,0 +1,33 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface MetadataFileNameFieldAbstract extends Value +{ + StagedFilesDatasetProperties stagedFilesDatasetProperties(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java new file mode 100644 index 00000000000..3e4e96d750c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java @@ -0,0 +1,30 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface MetadataRowNumberFieldAbstract extends Value +{ +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java new file mode 100644 index 00000000000..f64d0bbdd8a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface TryCastFunctionAbstract extends Value +{ + Value field(); + + FieldType type(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 5d348b6f47b..0b690590ca5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -21,10 +21,16 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenerationHandler; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.IsNull; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Not; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Or; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.ExternalDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetAdditionalProperties; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.TableType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; @@ -34,6 +40,8 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -41,14 +49,17 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_DATASET_BASE_NAME; @@ -61,6 +72,9 @@ class BulkLoadPlanner extends Planner private Dataset externalDataset; private StagedFilesDataset stagedFilesDataset; + private static final String FILE = "FILE"; + private static final String ROW_NUMBER = "ROW_NUMBER"; + BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { super(datasets, ingestMode, plannerOptions, capabilities); @@ -118,24 +132,117 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) @Override public LogicalPlan buildLogicalPlanForDryRun(Resources resources) { + if (!capabilities.contains(Capability.DRY_RUN)) + { + return LogicalPlan.of(Collections.emptyList()); + } + List operations = new ArrayList<>(); - if (capabilities.contains(Capability.DRY_RUN) && stagedFilesDataset.stagedFilesDatasetProperties().dryRunSupported()) + if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) { Dataset validationDataset = getValidationDataset(); Copy copy = Copy.builder() - .targetDataset(validationDataset) - .sourceDataset(stagedFilesDataset.datasetReference().withAlias("")) - .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) - .dryRun(true) - .build(); + .targetDataset(validationDataset) + .sourceDataset(stagedFilesDataset.datasetReference().withAlias("")) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .dryRun(true) + .build(); + operations.add(copy); + } + else + { + Dataset validationDataset = getValidationDatasetWithMetaColumns(); + + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), true); + fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); + fieldsToSelect.add(MetadataRowNumberField.builder().build()); + + List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); + fieldsToInsert.add(FieldValue.builder().fieldName(FILE).datasetRef(stagingDataset().datasetReference()).build()); + fieldsToInsert.add(FieldValue.builder().fieldName(ROW_NUMBER).datasetRef(stagingDataset().datasetReference()).build()); + + Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); + + Copy copy = Copy.builder() + .targetDataset(validationDataset) + .sourceDataset(selectStage) + .addAllFields(fieldsToInsert) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .dryRun(false) + .build(); operations.add(copy); } return LogicalPlan.of(operations); } + @Override + public Map, LogicalPlan>> buildLogicalPlanForDryRunValidation(Resources resources) + { + if (!capabilities.contains(Capability.DRY_RUN) || stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) + { + return Collections.emptyMap(); + } + + Dataset validationDataset = getValidationDatasetWithMetaColumns(); + + Map, LogicalPlan>> validationMap = new HashMap<>(); + List fieldsToCheckForNull = stagingDataset().schema().fields().stream().filter(field -> !field.nullable()).collect(Collectors.toList()); + List fieldsToCheckForDatatype = stagingDataset().schema().fields().stream().filter(field -> !DataType.isStringDatatype(field.type().dataType())).collect(Collectors.toList()); + + if (!fieldsToCheckForNull.isEmpty()) + { + Selection queryForNull = Selection.builder() + .source(validationDataset) + .condition(Or.of(fieldsToCheckForNull.stream().map(field -> + IsNull.of(FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build())) + .collect(Collectors.toList()))) + .build(); + + validationMap.put(ValidationCategory.NULL_VALUES, + Collections.singletonMap(fieldsToCheckForNull.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForNull)))); + } + + if (!fieldsToCheckForDatatype.isEmpty()) + { + if (capabilities.contains(Capability.SAFE_CAST)) + { + Selection queryForDatatype = getSelectColumnsWithTryCast(validationDataset, fieldsToCheckForDatatype); + validationMap.put(ValidationCategory.DATATYPE_CONVERSION, + Collections.singletonMap(fieldsToCheckForDatatype.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForDatatype)))); + } + else + { + validationMap.put(ValidationCategory.DATATYPE_CONVERSION, new HashMap<>()); + for (Field fieldToCheckForDatatype : fieldsToCheckForDatatype) + { + // TODO: change this to use cast - since we know at this point try cast is not possible + Selection queryForDatatype = getSelectColumnsWithTryCast(validationDataset, Collections.singletonList(fieldToCheckForDatatype)); + validationMap.get(ValidationCategory.DATATYPE_CONVERSION).put(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForDatatype))); + } + } + } + + return validationMap; + } + + private Selection getSelectColumnsWithTryCast(Dataset dataset, List fieldsToCheckForDatatype) + { + return Selection.builder() + .source(dataset) + .condition(Or.of(fieldsToCheckForDatatype.stream().map(field -> And.builder() + .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build()))) + .addConditions(IsNull.of(TryCastFunction.builder().field(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build()).type(field.type()).build())) + .build()) + .collect(Collectors.toList()))) + .build(); + } + private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { - List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), false); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); // Add digest @@ -207,9 +314,16 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) { List operations = new ArrayList<>(); - if (capabilities.contains(Capability.DRY_RUN) && stagedFilesDataset.stagedFilesDatasetProperties().dryRunSupported()) + if (capabilities.contains(Capability.DRY_RUN)) { - operations.add(Create.of(true, getValidationDataset())); + if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) + { + operations.add(Create.of(true, getValidationDataset())); + } + else + { + operations.add(Create.of(true, getValidationDatasetWithMetaColumns())); + } } return LogicalPlan.of(operations); } @@ -298,4 +412,21 @@ private Dataset getValidationDataset() .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) .build(); } + + private Dataset getValidationDatasetWithMetaColumns() + { + String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); + + List fields = stagedFilesDataset.schema().fields().stream().map(field -> field.withType(FieldType.builder().dataType(DataType.VARCHAR).build()).withNullable(true)).collect(Collectors.toList()); + fields.add(Field.builder().name(FILE).type(FieldType.builder().dataType(DataType.VARCHAR).build()).build()); + fields.add(Field.builder().name(ROW_NUMBER).type(FieldType.builder().dataType(DataType.BIGINT).build()).build()); + + return DatasetDefinition.builder() + .schema(stagedFilesDataset.schema().withFields(fields)) + .database(mainDataset().datasetReference().database()) + .group(mainDataset().datasetReference().group()) + .name(tableName + UNDERSCORE + "validation") // TODO legend_persistence + .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) + .build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index db078c7eadc..db828953fe3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -44,6 +44,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.All; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.ObjectValue; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; @@ -53,6 +54,7 @@ import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.util.ArrayList; import java.util.List; @@ -268,6 +270,11 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) return LogicalPlan.of(Collections.emptyList()); } + public Map, LogicalPlan>> buildLogicalPlanForDryRunValidation(Resources resources) + { + return Collections.emptyMap(); + } + public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) { return LogicalPlan.of(Collections.emptyList()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index 4ec80de6b9e..9bd5c256417 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -23,5 +23,6 @@ public enum Capability DATA_TYPE_LENGTH_CHANGE, DATA_TYPE_SCALE_CHANGE, TRANSFORM_WHILE_COPY, - DRY_RUN; + DRY_RUN, + SAFE_CAST } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index b2c193d4b7b..2cc8b2c1b16 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -72,6 +72,7 @@ import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.FLOAT; import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.INT; import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.INTEGER; +import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.VARCHAR; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_BULK_LOAD_EVENT_ID; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_FILE_PATHS; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_FILE_PATTERNS; @@ -395,7 +396,7 @@ public static List findCommonPrimaryFieldsBetweenMainAndStaging(Dataset m return stagingDataset.schema().fields().stream().filter(field -> field.primaryKey() && primaryKeysFromMain.contains(field.name())).collect(Collectors.toList()); } - public static List extractStagedFilesFieldValues(Dataset dataset) + public static List extractStagedFilesFieldValues(Dataset dataset, boolean withVarCharType) { List stagedFilesFields = new ArrayList<>(); boolean columnNumbersPresent = dataset.schema().fields().stream().allMatch(field -> field.columnNumber().isPresent()); @@ -407,7 +408,7 @@ public static List extractStagedFilesFieldValues(Dataset dataset) .datasetRefAlias(dataset.datasetReference().alias()) .alias(field.fieldAlias().isPresent() ? field.fieldAlias().get() : field.name()) .elementPath(field.elementPath()) - .fieldType(field.type()) + .fieldType(withVarCharType ? FieldType.builder().dataType(VARCHAR).build() : field.type()) .fieldName(field.name()) .build(); stagedFilesFields.add(fieldValue); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java new file mode 100644 index 00000000000..479808aba5f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java @@ -0,0 +1,33 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +public enum ValidationCategory +{ + NULL_VALUES("Null values found in non-nullable column"), + DATATYPE_CONVERSION("Unable to type cast column"); + + private final String validationFailedErrorMessage; + + ValidationCategory(String validationFailedErrorMessage) + { + this.validationFailedErrorMessage = validationFailedErrorMessage; + } + + public String getValidationFailedErrorMessage() + { + return this.validationFailedErrorMessage; + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index c4e6a054513..54f78f840c1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -154,8 +154,10 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.util.*; @@ -327,7 +329,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) + public List performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) { throw new UnsupportedOperationException("DryRun not supported!"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index e10ed0a00b2..fc790cd6cea 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -22,6 +22,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.api.DataError; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; @@ -31,10 +32,13 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.sink.Sink; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.util.*; +import java.util.stream.Collectors; public abstract class RelationalSink implements Sink { @@ -190,5 +194,37 @@ public interface ConstructDatasetFromDatabase public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); - public abstract List performDryRun(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount); + public abstract List performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount); + + protected Optional getString(Map row, String key) + { + Object value = row.get(key); + String strValue = value == null ? null : (String) value; + return Optional.ofNullable(strValue); + } + + protected Optional getLong(Map row, String key) + { + Object value = row.get(key); + Long longValue = value == null ? null : (Long) value; + return Optional.ofNullable(longValue); + } + + protected DataError constructDataError(Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) + { + String commaSeparatedRow = row.keySet().stream() + .sorted() + .filter(key -> !key.equals(fileNameColumnName) && !key.equals(rowNumberColumnName)) + .map(key -> getString(row, key).orElse("")) + .collect(Collectors.joining(",")); + + return DataError.builder() + .errorMessage(validationCategory.getValidationFailedErrorMessage()) + .file(getString(row, fileNameColumnName).orElseThrow(IllegalStateException::new)) + .errorCategory(validationCategory.name()) + .columnName(validatedColumnName) + .rowNumber(getLong(row, rowNumberColumnName)) + .rejectedRecord(commaSeparatedRow) + .build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 7b049f18b2b..250191dfae1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -17,9 +17,11 @@ import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.SqlPlanAbstract; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Style; @@ -27,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; @Immutable @@ -57,6 +60,8 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan dryRunSqlPlan(); + public abstract Map, SqlPlan>> dryRunValidationSqlPlan(); + public abstract Optional ingestDataSplitRange(); public abstract SqlPlan metadataIngestSqlPlan(); @@ -113,6 +118,17 @@ public List dryRunSql() return dryRunSqlPlan().getSqlList(); } + public Map, String>> dryRunValidationSql() + { + return dryRunValidationSqlPlan().keySet().stream() + .collect(Collectors.toMap( + k -> k, + k -> dryRunValidationSqlPlan().get(k).keySet().stream().collect(Collectors.toMap( + k2 -> k2, + k2 -> dryRunValidationSqlPlan().get(k).get(k2).getSql() + )))); + } + public List metadataIngestSql() { return metadataIngestSqlPlan().getSqlList(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index fa3da3f3d43..9a8ff64d076 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -21,6 +21,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.planner.Planner; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.planner.Planners; @@ -35,6 +36,7 @@ import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.MetadataUtils; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.immutables.value.Value.Default; import org.immutables.value.Value.Derived; import org.immutables.value.Value.Immutable; @@ -287,6 +289,19 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan dryRunLogicalPlan = planner.buildLogicalPlanForDryRun(resources); SqlPlan dryRunSqlPlan = transformer.generatePhysicalPlan(dryRunLogicalPlan); + // dry-run validations + Map, LogicalPlan>> dryRunValidationLogicalPlan = planner.buildLogicalPlanForDryRunValidation(resources); + Map, SqlPlan>> dryRunValidationSqlPlan = new HashMap<>(); + for (ValidationCategory validationCategory : dryRunValidationLogicalPlan.keySet()) + { + dryRunValidationSqlPlan.put(validationCategory, new HashMap<>()); + for (Set columns : dryRunValidationLogicalPlan.get(validationCategory).keySet()) + { + SqlPlan sqlplan = transformer.generatePhysicalPlan(dryRunValidationLogicalPlan.get(validationCategory).get(columns)); + dryRunValidationSqlPlan.get(validationCategory).put(columns, sqlplan); + } + } + // metadata ingest LogicalPlan metaDataIngestLogicalPlan = planner.buildLogicalPlanForMetadataIngest(resources); SqlPlan metaDataIngestSqlPlan = transformer.generatePhysicalPlan(metaDataIngestLogicalPlan); @@ -319,6 +334,7 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann .schemaEvolutionDataset(schemaEvolutionDataset) .ingestSqlPlan(ingestSqlPlan) .dryRunSqlPlan(dryRunSqlPlan) + .putAllDryRunValidationSqlPlan(dryRunValidationSqlPlan) .postActionsSqlPlan(postActionsSqlPlan) .postCleanupSqlPlan(postCleanupSqlPlan) .metadataIngestSqlPlan(metaDataIngestSqlPlan) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 1d416f13eae..8993135ab33 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -535,7 +535,8 @@ private List performDryRun() if (enrichedIngestMode instanceof BulkLoad) { executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); - return relationalSink().performDryRun(executor, generatorResult.dryRunSqlPlan(), sampleRowCount()); + executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); + return relationalSink().performDryRun(transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); } else { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index fbbfba7b24c..9b9216e8b1d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -61,6 +61,9 @@ public enum Clause NOT_ENFORCED("NOT ENFORCED"), DATA_TYPE("DATA TYPE"), CONVERT("CONVERT"), + CAST("CAST"), + TRY_CAST("TRY_CAST"), + AS("AS"), ARRAY("ARRAY"), LOAD_DATA("LOAD DATA"), OVERWRITE("OVERWRITE"), diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java index 243caae8648..80208a83db1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java @@ -73,7 +73,6 @@ public void genSql(StringBuilder builder) throws SqlDomException @Override public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException { - validate(); function.genSqlWithoutAlias(builder); builder.append(WHITE_SPACE); builder.append(OVER); @@ -81,7 +80,7 @@ public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException builder.append(OPEN_PARENTHESIS); // Add Partition By - if (partitionByFields != null) + if (partitionByFields != null && !partitionByFields.isEmpty()) { builder.append(PARTITION_BY.get() + WHITE_SPACE); for (int ctr = 0; ctr < partitionByFields.size(); ctr++) @@ -122,12 +121,4 @@ else if (node instanceof Field) partitionByFields.add((Field) node); } } - - void validate() throws SqlDomException - { - if ((partitionByFields == null || partitionByFields.isEmpty()) && (orderByFields == null || orderByFields.isEmpty())) - { - throw new SqlDomException("Both partitionByFields and orderByFields are empty"); - } - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java index 9835faeaba9..7e4c0659ade 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java @@ -21,7 +21,6 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Function; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.OrderedField; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.WindowFunction; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.Arrays; @@ -32,23 +31,6 @@ public class WindowFunctionTest { - - @Test - void testBothPartitionAndOrderByFieldsMissing() - { - Function rowNumber = new Function(FunctionName.ROW_NUMBER, null, BaseTest.QUOTE_IDENTIFIER); - WindowFunction windowFunction = new WindowFunction(BaseTest.QUOTE_IDENTIFIER, rowNumber, null, null); - try - { - String sql = BaseTest.genSql(windowFunction); - Assertions.fail("Should have thrown Exception"); - } - catch (Exception e) - { - assertEquals("Both partitionByFields and orderByFields are empty", e.getMessage()); - } - } - @Test void testWithPartitionFields() { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index d2a986eaffd..dffa2c3744f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -14,11 +14,14 @@ package org.finos.legend.engine.persistence.components.relational.h2; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; @@ -29,9 +32,13 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.LoadCsv; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.HashFunction; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -39,6 +46,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; +import org.finos.legend.engine.persistence.components.relational.api.DataError; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; @@ -49,6 +57,8 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.DigestUdfVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.HashFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.LoadCsvVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.MetadataFileNameFieldVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.MetadataRowNumberFieldVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.SchemaDefinitionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ParseJsonFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.FieldVisitor; @@ -57,8 +67,10 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesSelectionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ToArrayFunctionVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.TryCastFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; @@ -67,6 +79,7 @@ import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.sql.Connection; import java.sql.DriverManager; @@ -77,9 +90,12 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.DATATYPE_CONVERSION; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUES; public class H2Sink extends AnsiSqlSink { @@ -90,6 +106,9 @@ public class H2Sink extends AnsiSqlSink private static final Map> IMPLICIT_DATA_TYPE_MAPPING; private static final Map> EXPLICIT_DATA_TYPE_MAPPING; + private static final String FILE = "FILE"; + private static final String ROW_NUMBER = "ROW_NUMBER"; + static { Set capabilities = new HashSet<>(); @@ -100,6 +119,7 @@ public class H2Sink extends AnsiSqlSink capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.DATA_TYPE_SCALE_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); + capabilities.add(Capability.DRY_RUN); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -116,6 +136,9 @@ public class H2Sink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); logicalPlanVisitorByClass.put(ToArrayFunction.class, new ToArrayFunctionVisitor()); + logicalPlanVisitorByClass.put(TryCastFunction.class, new TryCastFunctionVisitor()); + logicalPlanVisitorByClass.put(MetadataFileNameField.class, new MetadataFileNameFieldVisitor()); + logicalPlanVisitorByClass.put(MetadataRowNumberField.class, new MetadataRowNumberFieldVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); Map> implicitDataTypeMapping = new HashMap<>(); @@ -236,4 +259,68 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + { + executor.executePhysicalPlan(dryRunSqlPlan); + + List dataErrors = new ArrayList<>(); + + Map, SqlPlan> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUES, new HashMap<>()); + Map, SqlPlan> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(DATATYPE_CONVERSION, new HashMap<>()); + + // Execute queries for null values + for (Set validatedColumns : queriesForNull.keySet()) + { + List results = executor.executePhysicalPlanAndGetResults(queriesForNull.get(validatedColumns), sampleRowCount); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + for (String column : validatedColumns.stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + { + if (row.get(column) == null) + { + dataErrors.add(constructDataError(row, FILE, ROW_NUMBER, NULL_VALUES, column)); + } + } + } + } + } + + // Execute queries for datatype conversion + for (Set validatedColumns : queriesForDatatype.keySet()) + { + try + { + executor.executePhysicalPlanAndGetResults(queriesForDatatype.get(validatedColumns), sampleRowCount); + } + catch (RuntimeException e) + { + String errorMessage = e.getCause().getMessage(); + String problematicValue = errorMessage.substring(0, errorMessage.indexOf("; SQL statement")); + problematicValue = problematicValue.replace("org.h2.jdbc.JdbcSQLDataException: Data conversion error converting ", ""); + problematicValue = problematicValue.replace("org.h2.jdbc.JdbcSQLDataException: Cannot parse \"TIMESTAMP\" constant ", ""); + problematicValue = problematicValue.replaceAll("\"", ""); + + // This loop will only be executed once as there is always only one element in the set + for (FieldValue validatedColumn : validatedColumns) + { + List results = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(LogicalPlanFactory.getLogicalPlanForSelectAllFieldsWithStringFieldEquals(validatedColumn, problematicValue)), sampleRowCount); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + dataErrors.add(constructDataError(row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName())); + } + } + } + + } + } + + return dataErrors; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java new file mode 100644 index 00000000000..12dc58dc423 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java @@ -0,0 +1,39 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.StringValueVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataFileNameFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataFileNameField current, VisitorContext context) + { + if (!(current.stagedFilesDatasetProperties() instanceof H2StagedFilesDatasetProperties)) + { + throw new IllegalStateException("Only H2StagedFilesDatasetProperties are supported for H2 Sink"); + } + H2StagedFilesDatasetProperties datasetProperties = (H2StagedFilesDatasetProperties) current.stagedFilesDatasetProperties(); + + StringValue stringValue = StringValue.of(datasetProperties.filePaths().get(0)); + return new StringValueVisitor().visit(prev, stringValue, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java new file mode 100644 index 00000000000..173f509dbe7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java @@ -0,0 +1,36 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; +import org.finos.legend.engine.persistence.components.logicalplan.values.WindowFunction; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.WindowFunctionVisitor; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataRowNumberFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataRowNumberField current, VisitorContext context) + { + WindowFunction windowFunction = WindowFunction.builder() + .windowFunction(FunctionImpl.builder().functionName(FunctionName.ROW_NUMBER).build()) + .build(); + return new WindowFunctionVisitor().visit(prev, windowFunction, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java index 068721db03f..810b9028b6a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java @@ -29,7 +29,7 @@ public class StagedFilesDatasetVisitor implements LogicalPlanVisitor allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); + List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current, false); StagedFilesSelection selection = StagedFilesSelection.builder() .source(current) .addAllFields(allColumns) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java new file mode 100644 index 00000000000..46538b40475 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java @@ -0,0 +1,49 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; +import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class TryCastFunctionVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, TryCastFunction current, VisitorContext context) + { + DataType dataType = new H2DataTypeMapping().getDataType(current.type()); + + CastFunction castFunction = new CastFunction(dataType, context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) + { + castFunction = (CastFunction) optimizer.optimize(castFunction); + } + prev.push(castFunction); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.field()); + + return new VisitorResult(castFunction, logicalPlanNodeList); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java new file mode 100644 index 00000000000..b636e658ca4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java @@ -0,0 +1,69 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class CastFunction extends Value +{ + private Value column; + private DataType dataType; + + public CastFunction(DataType dataType, String quoteIdentifier) + { + super(quoteIdentifier); + this.dataType = dataType; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append(Clause.CAST); + builder.append(OPEN_PARENTHESIS); + column.genSqlWithoutAlias(builder); + builder.append(WHITE_SPACE); + builder.append(Clause.AS); + builder.append(WHITE_SPACE); + dataType.genSql(builder); + builder.append(CLOSING_PARENTHESIS); + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + column = (Value) node; + } + else if (node instanceof DataType) + { + dataType = (DataType) node; + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index b2d40e72e31..40584fdf69a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -35,12 +35,17 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.CaseConversion; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.DryRunResult; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; import org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -50,6 +55,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -89,6 +95,16 @@ public class BulkLoadTest extends BaseTest .name(COL_DATETIME) .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) .build(); + private static Field col2NonNullable = Field.builder() + .name(COL_STRING) + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .nullable(false) + .build(); + private static Field col3NonNullable = Field.builder() + .name(COL_DECIMAL) + .type(FieldType.of(DataType.DECIMAL, 5, 2)) + .nullable(false) + .build(); protected final ZonedDateTime fixedZonedDateTime_2000_01_01 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); protected final Clock fixedClock_2000_01_01 = Clock.fixed(fixedZonedDateTime_2000_01_01.toInstant(), ZoneOffset.UTC); @@ -131,9 +147,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoBulkLoadEventId() th GeneratorResult operations = generator.generateOperations(datasets); List preActionsSql = operations.preActionsSql(); - List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); - List dryRunSql = operations.dryRunSql(); Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + @@ -148,8 +162,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoBulkLoadEventId() th Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); - Assertions.assertEquals(0, dryRunPreActionsSql.size()); - Assertions.assertEquals(0, dryRunSql.size()); + // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); @@ -740,6 +753,187 @@ public void testBulkLoadNotCsvFile() } } + @Test + public void testBulkLoadDryRunSuccess() + { + String filePath = "src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // TODO: convert these to assertions + System.out.println(operations.dryRunPreActionsSql()); + System.out.println(operations.dryRunSql()); + System.out.println(operations.dryRunValidationSql()); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + Assertions.assertEquals(dryRunResult.status(), IngestStatus.SUCCEEDED); + Assertions.assertTrue(dryRunResult.errorRecords().isEmpty()); + } + + @Test + public void testBulkLoadDryRunFailure() + { + String filePath = "src/test/resources/data/bulk-load/input/bad_file.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2NonNullable, col3NonNullable, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER,\"col_string\" VARCHAR NOT NULL,\"col_decimal\" DECIMAL(5,2) NOT NULL,\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // TODO: convert these to assertions + System.out.println(operations.dryRunPreActionsSql()); + System.out.println(operations.dryRunSql()); + System.out.println(operations.dryRunValidationSql()); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + List expectedErrorRecords = Arrays.asList(DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.NULL_VALUES.name()) + .rowNumber(1L) + .columnName(col3NonNullable.name()) + .rejectedRecord("2022-01-99 00:00:00.0,,??,Andy") + .errorMessage("Null values found in non-nullable column") + .build(), DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.NULL_VALUES.name()) + .rowNumber(2L) + .columnName(col2NonNullable.name()) + .rejectedRecord("2022-01-12 00:00:00.0,NaN,2,") + .errorMessage("Null values found in non-nullable column") + .build(), DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .rowNumber(1L) + .columnName(col1.name()) + .rejectedRecord("2022-01-99 00:00:00.0,,??,Andy") + .errorMessage("Unable to type cast column") + .build(), DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .rowNumber(1L) + .columnName(col4.name()) + .rejectedRecord("2022-01-99 00:00:00.0,,??,Andy") + .errorMessage("Unable to type cast column") + .build(), DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .rowNumber(2L) + .columnName(col3.name()) + .rejectedRecord("2022-01-12 00:00:00.0,NaN,2,") + .errorMessage("Unable to type cast column") + .build()); + + Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); + Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); + } + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, Optional eventId) { return RelationalIngestor.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv new file mode 100644 index 00000000000..d39a87c98ed --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv @@ -0,0 +1,3 @@ +??,Andy,,2022-01-99 00:00:00.0 +2,,NaN,2022-01-12 00:00:00.0 +,Success,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv new file mode 100644 index 00000000000..f06cdcc3da8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv @@ -0,0 +1,3 @@ +1,,5.20,2022-01-11 00:00:00.0 +2,123456789123456789123456789,99.99,2022-01-12 00:00:00.0 +, \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index ce7e85c5536..2ad57571f21 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -35,7 +35,11 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Show; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -55,6 +59,8 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.AlterVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.BatchEndTimestampVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.ClusterKeyVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.MetadataFileNameFieldVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.MetadataRowNumberFieldVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.SQLCreateVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.SchemaDefinitionVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.FieldVisitor; @@ -66,13 +72,16 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.DigestUdfVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesSelectionVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.TryCastFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -88,8 +97,8 @@ import java.util.Optional; import java.util.Properties; import java.util.Set; -import java.util.Objects; import java.util.ArrayList; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; @@ -133,6 +142,7 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); capabilities.add(Capability.DRY_RUN); + capabilities.add(Capability.SAFE_CAST); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -150,6 +160,9 @@ public class SnowflakeSink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(StagedFilesSelection.class, new StagedFilesSelectionVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); + logicalPlanVisitorByClass.put(TryCastFunction.class, new TryCastFunctionVisitor()); + logicalPlanVisitorByClass.put(MetadataFileNameField.class, new MetadataFileNameFieldVisitor()); + logicalPlanVisitorByClass.put(MetadataRowNumberField.class, new MetadataRowNumberFieldVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); @@ -247,12 +260,20 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } - public List performDryRun(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) + public List performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) { - if (dryRunSqlPlan == null || dryRunSqlPlan.getSqlList().isEmpty()) + if (dryRunValidationSqlPlan == null || dryRunValidationSqlPlan.isEmpty()) { - throw new RuntimeException("DryRun supported for this ingest"); + return performDryRunWithValidationMode(executor, dryRunSqlPlan, sampleRowCount); } + else + { + return performDryRunWithValidationQueries(executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount); + } + } + + private List performDryRunWithValidationMode(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) + { List results = executor.executePhysicalPlanAndGetResults(dryRunSqlPlan, sampleRowCount); List dataErrors = new ArrayList<>(); @@ -262,22 +283,51 @@ public List performDryRun(Executor exec for (Map row : resultSets) { DataError dataError = DataError.builder() - .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) - .file(getString(row, FILE_WITH_ERROR).orElseThrow(IllegalStateException::new)) - .errorCategory(getString(row, CATEGORY).orElseThrow(IllegalStateException::new)) - .columnName(getString(row, COLUMN_NAME)) - .lineNumber(getLong(row, LINE)) - .characterPosition(getLong(row, CHARACTER)) - .rowNumber(getLong(row, ROW_NUMBER)) - .rowStartLineNumber(getLong(row, ROW_START_LINE)) - .rejectedRecord(getString(row, REJECTED_RECORD)) - .build(); + .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) + .file(getString(row, FILE_WITH_ERROR).orElseThrow(IllegalStateException::new)) + .errorCategory(getString(row, CATEGORY).orElseThrow(IllegalStateException::new)) + .columnName(getString(row, COLUMN_NAME)) + .lineNumber(getLong(row, LINE)) + .characterPosition(getLong(row, CHARACTER)) + .rowNumber(getLong(row, ROW_NUMBER)) + .rowStartLineNumber(getLong(row, ROW_START_LINE)) + .rejectedRecord(getString(row, REJECTED_RECORD)) + .build(); dataErrors.add(dataError); } } return dataErrors; } + private List performDryRunWithValidationQueries(Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + { + executor.executePhysicalPlan(dryRunSqlPlan); + + List dataErrors = new ArrayList<>(); + for (ValidationCategory validationCategory : dryRunValidationSqlPlan.keySet()) + { + for (Set validatedColumns : dryRunValidationSqlPlan.get(validationCategory).keySet()) + { + List results = executor.executePhysicalPlanAndGetResults(dryRunValidationSqlPlan.get(validationCategory).get(validatedColumns), sampleRowCount); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + for (String column : validatedColumns.stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + { + if (row.get(column) == null) + { + dataErrors.add(constructDataError(row, FILE_WITH_ERROR, ROW_NUMBER, validationCategory, column)); + } + } + } + } + } + } + return dataErrors; + } + @Override public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { @@ -370,19 +420,4 @@ private String getErrorMessage(Map row) throw new RuntimeException(e); } } - - private Optional getString(Map row, String key) - { - Object value = row.get(key); - String strValue = value == null ? null : (String) value; - return Optional.ofNullable(strValue); - } - - private Optional getLong(Map row, String key) - { - Object value = row.get(key); - Long longValue = value == null ? null : (Long) value; - return Optional.ofNullable(longValue); - } - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java index 6627b0d6b53..7ff6bda8d92 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java @@ -39,16 +39,16 @@ public interface SnowflakeStagedFilesDatasetPropertiesAbstract extends StagedFil Map copyOptions(); @Value.Derived - default boolean dryRunSupported() + default boolean validationModeSupported() { // Only supported for CSV - boolean dryRunSupported = false; + boolean validationModeSuppoerted = false; if (fileFormat().isPresent() && fileFormat().get() instanceof StandardFileFormat) { StandardFileFormat standardFileFormat = (StandardFileFormat) fileFormat().get(); - dryRunSupported = standardFileFormat.formatType().equals(FileFormatType.CSV); + validationModeSuppoerted = standardFileFormat.formatType().equals(FileFormatType.CSV); } - return dryRunSupported; + return validationModeSuppoerted; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java new file mode 100644 index 00000000000..f1f24e10a9c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java @@ -0,0 +1,32 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.MetadataFileNameColumn; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataFileNameFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataFileNameField current, VisitorContext context) + { + MetadataFileNameColumn fileNameColumn = new MetadataFileNameColumn(context.quoteIdentifier()); + prev.push(fileNameColumn); + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java new file mode 100644 index 00000000000..4d41cdda875 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java @@ -0,0 +1,32 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.MetadataRowNumberColumn; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataRowNumberFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataRowNumberField current, VisitorContext context) + { + MetadataRowNumberColumn rowNumberColumn = new MetadataRowNumberColumn(context.quoteIdentifier()); + prev.push(rowNumberColumn); + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java index c22d0b6ed44..76378584c4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java @@ -29,7 +29,7 @@ public class StagedFilesDatasetVisitor implements LogicalPlanVisitor allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); + List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current, false); StagedFilesSelection selection = StagedFilesSelection.builder() .source(current) .addAllFields(allColumns) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java new file mode 100644 index 00000000000..aabbf31ae6c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java @@ -0,0 +1,49 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; +import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.SnowflakeDataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class TryCastFunctionVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, TryCastFunction current, VisitorContext context) + { + DataType dataType = new SnowflakeDataTypeMapping().getDataType(current.type()); + + org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.TryCastFunction tryCastFunction = + new org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.TryCastFunction(dataType, context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) + { + tryCastFunction = (org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.TryCastFunction) optimizer.optimize(tryCastFunction); + } + prev.push(tryCastFunction); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.field()); + + return new VisitorResult(tryCastFunction, logicalPlanNodeList); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameColumn.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameColumn.java new file mode 100644 index 00000000000..faf1731bb98 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameColumn.java @@ -0,0 +1,40 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +public class MetadataFileNameColumn extends Value +{ + + public MetadataFileNameColumn(String quoteIdentifier) + { + super(quoteIdentifier); + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append("METADATA$FILENAME"); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java new file mode 100644 index 00000000000..dedb1a1e63f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java @@ -0,0 +1,40 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +public class MetadataRowNumberColumn extends Value +{ + + public MetadataRowNumberColumn(String quoteIdentifier) + { + super(quoteIdentifier); + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append("METADATA$FILE_ROW_NUMBER"); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java new file mode 100644 index 00000000000..b26b223f6ec --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java @@ -0,0 +1,69 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class TryCastFunction extends Value +{ + private Value column; + private DataType dataType; + + public TryCastFunction(DataType dataType, String quoteIdentifier) + { + super(quoteIdentifier); + this.dataType = dataType; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append(Clause.TRY_CAST); + builder.append(OPEN_PARENTHESIS); + column.genSqlWithoutAlias(builder); + builder.append(WHITE_SPACE); + builder.append(Clause.AS); + builder.append(WHITE_SPACE); + dataType.genSql(builder); + builder.append(CLOSING_PARENTHESIS); + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + column = (Value) node; + } + else if (node instanceof DataType) + { + dataType = (DataType) node; + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index d0db719fb61..f6d8f395e7d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -72,6 +72,13 @@ public class BulkLoadTest .columnNumber(5) .build(); + private static Field col3NonNullable = Field.builder() + .name("col_bigint") + .type(FieldType.of(DataType.BIGINT, Optional.empty(), Optional.empty())) + .columnNumber(4) + .nullable(false) + .build(); + private List filesList = Arrays.asList("/path/xyz/file1.csv", "/path/xyz/file2.csv"); protected final ZonedDateTime fixedZonedDateTime_2000_01_01 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); @@ -170,7 +177,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .location("my_location") .fileFormat(StandardFileFormat.builder().formatType(FileFormatType.AVRO).build()) .addAllFilePaths(filesList).build()) - .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col3, col4)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col3NonNullable, col4)).build()) .alias("t") .build(); @@ -196,7 +203,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() List metaIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT,\"col_variant\" VARIANT,\"batch_id\" INTEGER)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT NOT NULL,\"col_variant\" VARIANT,\"batch_id\" INTEGER)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_bigint\", \"col_variant\", \"batch_id\") " + "FROM " + @@ -215,6 +222,11 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetaIngestSql, metaIngestSql.get(0)); + // TODO: convert these to assertions + System.out.println(operations.dryRunPreActionsSql()); + System.out.println(operations.dryRunSql()); + System.out.println(operations.dryRunValidationSql()); + Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); Assertions.assertNull(statsSql.get(ROWS_TERMINATED)); From b2e43a580dfb4e8cbbb9eb3694afc5c0a54003b0 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 28 Feb 2024 12:35:20 +0530 Subject: [PATCH 11/32] Changes for Adding hash to temp tables --- .../components/planner/BulkLoadPlanner.java | 13 ++- .../components/planner/Planner.java | 9 +- .../components/util/LogicalPlanUtils.java | 10 +- .../components/util/TableNameGenUtils.java | 22 +++-- .../components/AnsiTestArtifacts.java | 60 ++++++------ .../nontemporal/AppendOnlyTest.java | 18 ++-- .../nontemporal/NontemporalDeltaTest.java | 82 ++++++++-------- .../nontemporal/NontemporalSnapshotTest.java | 8 +- .../UnitemporalDeltaBatchIdBasedTest.java | 38 ++++---- ...temporalDeltaBatchIdDateTimeBasedTest.java | 14 +-- .../UnitemporalDeltaDateTimeBasedTest.java | 16 +-- .../UnitemporalSnapshotBatchIdBasedTest.java | 4 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 8 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 4 +- .../util/TableNameGenUtilsTest.java | 30 ++++++ .../components/ingestmode/AppendOnlyTest.java | 23 ++--- .../ingestmode/BigQueryTestArtifacts.java | 44 ++++----- .../components/ingestmode/BulkLoadTest.java | 26 +++-- .../ingestmode/NontemporalDeltaTest.java | 14 +-- .../ingestmode/NontemporalSnapshotTest.java | 4 +- .../UnitemporalDeltaBatchIdBasedTest.java | 30 +++--- ...temporalDeltaBatchIdDateTimeBasedTest.java | 10 +- .../UnitemporalDeltaDateTimeBasedTest.java | 12 +-- .../UnitemporalSnapshotBatchIdBasedTest.java | 4 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 8 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 4 +- .../api/RelationalGeneratorAbstract.java | 8 ++ .../api/RelationalIngestorAbstract.java | 27 +++--- .../nontemporal/AppendOnlyTest.java | 3 +- .../versioning/TestDedupAndVersioning.java | 97 ++++++++++--------- .../components/ingestmode/AppendOnlyTest.java | 18 ++-- .../ingestmode/MemsqlTestArtifacts.java | 40 ++++---- .../ingestmode/NontemporalDeltaTest.java | 24 ++--- .../ingestmode/NontemporalSnapshotTest.java | 8 +- .../UnitemporalDeltaBatchIdBasedTest.java | 30 +++--- ...temporalDeltaBatchIdDateTimeBasedTest.java | 10 +- .../UnitemporalDeltaDateTimeBasedTest.java | 12 +-- .../UnitemporalSnapshotBatchIdBasedTest.java | 4 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 8 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 4 +- .../components/SnowflakeTestArtifacts.java | 4 +- .../components/ingestmode/AppendOnlyTest.java | 5 +- .../components/ingestmode/BulkLoadTest.java | 18 +++- .../ingestmode/NontemporalDeltaMergeTest.java | 10 +- .../persistence/components/BaseTest.java | 1 + .../nontemporal/AppendOnlyTestCases.java | 8 ++ .../NontemporalDeltaTestCases.java | 5 + .../NontemporalSnapshotTestCases.java | 2 + ...nitmemporalDeltaBatchIdBasedTestCases.java | 6 ++ ...ralDeltaBatchIdDateTimeBasedTestCases.java | 2 + ...itmemporalDeltaDateTimeBasedTestCases.java | 2 + ...memporalSnapshotBatchIdBasedTestCases.java | 1 + ...SnapshotBatchIdDateTimeBasedTestCases.java | 2 + ...emporalSnapshotDateTimeBasedTestCases.java | 1 + 54 files changed, 489 insertions(+), 386 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 5d348b6f47b..cf6c740ad17 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -46,13 +46,14 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; +import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import java.util.*; import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_DATASET_BASE_NAME; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.UNDERSCORE; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_DATASET_ALIAS; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_DATASET_QUALIFIER; class BulkLoadPlanner extends Planner { @@ -77,12 +78,13 @@ class BulkLoadPlanner extends Planner transformWhileCopy = capabilities.contains(Capability.TRANSFORM_WHILE_COPY); if (!transformWhileCopy) { + String externalDatasetName = TableNameGenUtils.generateTableName(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)), TEMP_DATASET_QUALIFIER, options().ingestRunId()); externalDataset = ExternalDataset.builder() .stagedFilesDataset(stagedFilesDataset) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) - .name(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)) + UNDERSCORE + TEMP_DATASET_BASE_NAME) - .alias(TEMP_DATASET_BASE_NAME) + .name(externalDatasetName) + .alias(TEMP_DATASET_ALIAS) .build(); } } @@ -290,11 +292,12 @@ protected void addPostRunStatsForRowsDeleted(Map pos private Dataset getValidationDataset() { String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); + String validationDatasetName = TableNameGenUtils.generateTableName(tableName, "validation", options().ingestRunId()); return DatasetDefinition.builder() .schema(stagedFilesDataset.schema()) .database(mainDataset().datasetReference().database()) .group(mainDataset().datasetReference().group()) - .name(tableName + UNDERSCORE + "validation") // TODO legend_persistence + .name(validationDatasetName) .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index db078c7eadc..c601921f0fa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -61,6 +61,7 @@ import java.util.Set; import java.util.Collections; import java.util.HashMap; +import java.util.UUID; import static org.finos.legend.engine.persistence.components.common.StatisticName.INCOMING_RECORD_COUNT; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_DELETED; @@ -131,6 +132,12 @@ default int sampleRowCount() { return 20; } + + @Default + default String ingestRunId() + { + return UUID.randomUUID().toString(); + } } private final Datasets datasets; @@ -179,7 +186,7 @@ private Optional getTempStagingDataset() Optional tempStagingDataset = Optional.empty(); if (isTempTableNeededForStaging) { - tempStagingDataset = Optional.of(LogicalPlanUtils.getTempStagingDatasetDefinition(originalStagingDataset(), ingestMode)); + tempStagingDataset = Optional.of(LogicalPlanUtils.getTempStagingDatasetDefinition(originalStagingDataset(), ingestMode, options().ingestRunId())); } return tempStagingDataset; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index b2c193d4b7b..cb001c3ceeb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -62,7 +62,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.UUID; import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.BIGINT; @@ -76,6 +75,8 @@ import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_FILE_PATHS; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_FILE_PATTERNS; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_STAGING_FILTERS; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_STAGING_DATASET_ALIAS; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_STAGING_DATASET_QUALIFIER; public class LogicalPlanUtils @@ -86,7 +87,6 @@ public class LogicalPlanUtils public static final String DATA_SPLIT_UPPER_BOUND_PLACEHOLDER = "{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}"; public static final String UNDERSCORE = "_"; public static final String TEMP_DATASET_BASE_NAME = "legend_persistence_temp"; - public static final String TEMP_STAGING_DATASET_BASE_NAME = "legend_persistence_temp_staging"; public static final String TEMP_DATASET_WITH_DELETE_INDICATOR_BASE_NAME = "legend_persistence_tempWithDeleteIndicator"; private LogicalPlanUtils() @@ -449,10 +449,10 @@ public static Dataset getTempDatasetWithDeleteIndicator(Datasets datasets, Strin } } - public static Dataset getTempStagingDatasetDefinition(Dataset stagingDataset, IngestMode ingestMode) + public static Dataset getTempStagingDatasetDefinition(Dataset stagingDataset, IngestMode ingestMode, String ingestRunId) { - String alias = stagingDataset.datasetReference().alias().orElse(TEMP_STAGING_DATASET_BASE_NAME); - String datasetName = stagingDataset.datasetReference().name().orElseThrow(IllegalStateException::new) + UNDERSCORE + TEMP_STAGING_DATASET_BASE_NAME; + String alias = stagingDataset.datasetReference().alias().orElse(TEMP_STAGING_DATASET_ALIAS); + String datasetName = TableNameGenUtils.generateTableName(stagingDataset.datasetReference().name().orElseThrow(IllegalStateException::new), TEMP_STAGING_DATASET_QUALIFIER, ingestRunId); SchemaDefinition tempStagingSchema = ingestMode.versioningStrategy().accept(new DeriveTempStagingSchemaDefinition(stagingDataset.schema(), ingestMode.deduplicationStrategy())); return DatasetDefinition.builder() .schema(tempStagingSchema) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java index 3693b68f18f..27911dab26e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java @@ -14,21 +14,27 @@ package org.finos.legend.engine.persistence.components.util; -import java.util.UUID; - import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.UNDERSCORE; public class TableNameGenUtils { - private static String generateTableSuffix() + public static String LEGEND_PERSISTENCE_MARKER = "lp"; + public static final String TEMP_DATASET_QUALIFIER = "temp"; + public static final String TEMP_DATASET_ALIAS = "legend_persistence_temp"; + public static final String TEMP_STAGING_DATASET_QUALIFIER = "temp_staging"; + public static final String TEMP_STAGING_DATASET_ALIAS = "legend_persistence_temp_staging"; + + private static String generateTableSuffix(String ingestRunId) { - UUID uuid = UUID.randomUUID(); - int uuidHashCode = Math.abs(uuid.hashCode()); - return UNDERSCORE + "LP" + UNDERSCORE + Integer.toString(uuidHashCode, 36); + int hashCode = Math.abs(ingestRunId.hashCode()); + return LEGEND_PERSISTENCE_MARKER + UNDERSCORE + Integer.toString(hashCode, 36); } - public static String generateTableName(String baseTableName, String suffix) + /* + Table name = __lp_ + */ + public static String generateTableName(String baseTableName, String qualifier, String ingestRunId) { - return baseTableName + UNDERSCORE + suffix + UNDERSCORE + generateTableSuffix(); + return baseTableName + UNDERSCORE + qualifier + UNDERSCORE + generateTableSuffix(ingestRunId); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java index 700bc1aa1b7..36297acf02e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java @@ -221,21 +221,21 @@ public static String getDropTempTableQuery(String tableName) "\"batch_id\" INTEGER," + "PRIMARY KEY (\"id\", \"name\", \"batch_update_time\"))"; - public static String expectedBaseTempStagingTablePlusDigest = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigest = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + "\"biz_date\" DATE," + "\"digest\" VARCHAR)"; - public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + "\"biz_date\" DATE," + "\"legend_persistence_count\" INTEGER)"; - public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -244,7 +244,7 @@ public static String getDropTempTableQuery(String tableName) "\"version\" INTEGER," + "\"legend_persistence_count\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -252,7 +252,7 @@ public static String getDropTempTableQuery(String tableName) "\"digest\" VARCHAR," + "\"legend_persistence_count\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\"" + + public static String expectedBaseTempStagingTablePlusDigestWithCountUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\"" + "(\"ID\" INTEGER NOT NULL," + "\"NAME\" VARCHAR NOT NULL," + "\"AMOUNT\" DOUBLE," + @@ -260,7 +260,7 @@ public static String getDropTempTableQuery(String tableName) "\"DIGEST\" VARCHAR," + "\"LEGEND_PERSISTENCE_COUNT\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithVersionUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\"" + + public static String expectedBaseTempStagingTablePlusDigestWithVersionUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\"" + "(\"ID\" INTEGER NOT NULL," + "\"NAME\" VARCHAR NOT NULL," + "\"AMOUNT\" DOUBLE," + @@ -268,7 +268,7 @@ public static String getDropTempTableQuery(String tableName) "\"DIGEST\" VARCHAR," + "\"VERSION\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"(" + + public static String expectedBaseTempStagingTablePlusDigestWithDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"(" + "\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -276,7 +276,7 @@ public static String getDropTempTableQuery(String tableName) "\"digest\" VARCHAR," + "\"data_split\" INTEGER NOT NULL)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -285,7 +285,7 @@ public static String getDropTempTableQuery(String tableName) "\"legend_persistence_count\" INTEGER," + "\"data_split\" INTEGER NOT NULL)"; - public static String expectedBaseTempStagingTablePlusDigestWithDataSplitAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigestWithDataSplitAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -494,8 +494,8 @@ public static String getDropTempTableQuery(String tableName) "PRIMARY KEY (\"id\", \"name\", \"validity_from_reference\"))"; public static String expectedStagingCleanupQuery = "DELETE FROM \"mydb\".\"staging\" as stage"; - public static String expectedTempStagingCleanupQuery = "DELETE FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage"; - public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage"; + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS \"mydb\".\"staging\" CASCADE"; public static String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata " + @@ -535,7 +535,7 @@ public static String getDropTempTableQuery(String tableName) "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'SUCCEEDED'," + "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; - public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER " + @@ -544,13 +544,13 @@ public static String getDropTempTableQuery(String tableName) "\"mydb\".\"staging\" as stage GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\") as stage) as stage " + "WHERE stage.\"legend_persistence_rank\" = 1)"; - public static String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\")"; - public static String expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as " + "\"legend_persistence_count\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + @@ -559,13 +559,13 @@ public static String getDropTempTableQuery(String tableName) "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\") as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\")"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER " + @@ -574,7 +574,7 @@ public static String getDropTempTableQuery(String tableName) "\"mydb\".\"staging\" as stage GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\") as stage) as stage " + "WHERE stage.\"legend_persistence_rank\" = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",DENSE_RANK() " + @@ -582,7 +582,7 @@ public static String getDropTempTableQuery(String tableName) "FROM \"mydb\".\"staging\" as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1)"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicatesUpperCase = "INSERT INTO " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"LEGEND_PERSISTENCE_COUNT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"LEGEND_PERSISTENCE_COUNT\" as \"LEGEND_PERSISTENCE_COUNT\" FROM " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"LEGEND_PERSISTENCE_COUNT\" as \"LEGEND_PERSISTENCE_COUNT\",DENSE_RANK() " + @@ -590,50 +590,50 @@ public static String getDropTempTableQuery(String tableName) "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",COUNT(*) as \"LEGEND_PERSISTENCE_COUNT\" FROM \"MYDB\".\"STAGING\" as stage " + "GROUP BY stage.\"ID\", stage.\"NAME\", stage.\"AMOUNT\", stage.\"BIZ_DATE\", stage.\"DIGEST\") as stage) as stage WHERE stage.\"LEGEND_PERSISTENCE_RANK\" = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\", \"data_split\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\") as stage)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"data_split\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + "FROM \"mydb\".\"staging\" as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.\"legend_persistence_count\") as \"MAX_DUPLICATES\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage"; - public static String dupRowsSql = "SELECT \"id\",\"name\",\"legend_persistence_count\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + public static String dupRowsSql = "SELECT \"id\",\"name\",\"legend_persistence_count\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE stage.\"legend_persistence_count\" > 1 LIMIT 20"; public static String dataErrorCheckSqlWithBizDateVersion = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + "(SELECT COUNT(DISTINCT(\"digest\")) as \"legend_persistence_distinct_rows\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; public static String dataErrorCheckSql = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + "(SELECT COUNT(DISTINCT(\"digest\")) as \"legend_persistence_distinct_rows\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"version\") as stage"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"version\") as stage"; public static String dataErrorCheckSqlUpperCase = "SELECT MAX(\"LEGEND_PERSISTENCE_DISTINCT_ROWS\") as \"MAX_DATA_ERRORS\" FROM" + " (SELECT COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_DISTINCT_ROWS\" FROM " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage GROUP BY \"ID\", \"NAME\", \"VERSION\") as stage"; + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage GROUP BY \"ID\", \"NAME\", \"VERSION\") as stage"; public static String dataErrorCheckSqlWithBizDateAsVersionUpperCase = "SELECT MAX(\"LEGEND_PERSISTENCE_DISTINCT_ROWS\") as \"MAX_DATA_ERRORS\" " + "FROM (SELECT COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_DISTINCT_ROWS\" FROM " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\") as stage"; + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\") as stage"; public static String dataErrorsSqlWithBizDateVersion = "SELECT \"id\",\"name\",\"biz_date\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; - public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT \"ID\",\"NAME\",\"BIZ_DATE\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT \"ID\",\"NAME\",\"BIZ_DATE\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 20"; - public static String dataErrorsSqlUpperCase = "SELECT \"ID\",\"NAME\",\"VERSION\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + public static String dataErrorsSqlUpperCase = "SELECT \"ID\",\"NAME\",\"VERSION\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "as stage GROUP BY \"ID\", \"NAME\", \"VERSION\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 10"; public static String dataErrorsSql = "SELECT \"id\",\"name\",\"version\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"version\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"version\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 535aaeb2d4a..f37d536e0eb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -76,7 +76,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -93,7 +93,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -112,7 +112,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx List deduplicationAndVersioningSql = queries.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -146,7 +146,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_number\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -166,7 +166,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithAdditionalMetadata(), operations.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_number\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -185,7 +185,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_UPDATE_TIME\", \"BATCH_ID\") " + - "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; @@ -200,7 +200,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -217,7 +217,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -250,7 +250,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index 942cf2cbd93..ef0ddbd563d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -36,10 +36,10 @@ public class NontemporalDeltaTest extends NontemporalDeltaTestCases protected String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsTempStagingTable = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + protected String incomingRecordCountWithSplitsTempStagingTable = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String incomingRecordCountWithSplitsWithDuplicates = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; @@ -106,20 +106,20 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000'," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; @@ -143,19 +143,19 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE " + "(((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) " + "AND ((stage.\"data_split\" >= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -222,18 +222,18 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000'," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000'," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -442,22 +442,22 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"version\" = (SELECT stage.\"version\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"version\" = (SELECT stage.\"version\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))"; + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" " + @@ -565,18 +565,18 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE \"MYDB\".\"MAIN\" as sink " + - "SET sink.\"ID\" = (SELECT stage.\"ID\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"NAME\" = (SELECT stage.\"NAME\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"AMOUNT\" = (SELECT stage.\"AMOUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"BIZ_DATE\" = (SELECT stage.\"BIZ_DATE\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"DIGEST\" = (SELECT stage.\"DIGEST\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"VERSION\" = (SELECT stage.\"VERSION\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "SET sink.\"ID\" = (SELECT stage.\"ID\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"NAME\" = (SELECT stage.\"NAME\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"AMOUNT\" = (SELECT stage.\"AMOUNT\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"BIZ_DATE\" = (SELECT stage.\"BIZ_DATE\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"DIGEST\" = (SELECT stage.\"DIGEST\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"VERSION\" = (SELECT stage.\"VERSION\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + "sink.\"BATCH_ID\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))"; + "WHERE EXISTS (SELECT * FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))"; String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\", \"BATCH_ID\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + - "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\"))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase, preActionsSqlList.get(0)); @@ -586,7 +586,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithUpperCase(), operations.metadataIngestSql().get(0)); - String insertTempStagingTable = "INSERT INTO \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + String insertTempStagingTable = "INSERT INTO \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index d867324e8c0..10c44d84bc7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -87,7 +87,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); @@ -116,14 +116,14 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage)"; String maxDataErrorCheckSql = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + - "(SELECT COUNT(DISTINCT(\"amount\")) as \"legend_persistence_distinct_rows\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(SELECT COUNT(DISTINCT(\"amount\")) as \"legend_persistence_distinct_rows\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; String dataErrorsSql = "SELECT \"id\",\"name\",\"biz_date\",COUNT(DISTINCT(\"amount\")) as \"legend_persistence_error_count\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java index ea11abec0c9..efd8c2e04e2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java @@ -134,7 +134,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE " + "(sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -142,7 +142,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "999999999 FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))) AND " + @@ -154,7 +154,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); - String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"delete_indicator\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"delete_indicator\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + @@ -179,7 +179,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -187,7 +187,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + @@ -205,7 +205,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsInserted\""; @@ -338,14 +338,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "as stage WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -353,7 +353,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + @@ -385,14 +385,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "as stage WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -400,7 +400,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + @@ -469,7 +469,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(stage.\"version\" > sink.\"version\")))"; @@ -478,7 +478,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + "999999999 " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -487,7 +487,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage GROUP BY stage.\"id\", " + @@ -512,14 +512,14 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink " + "SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1 WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE " + + "(EXISTS (SELECT * FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE " + "((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")))"; String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999 FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999 FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + "(stage.\"VERSION\" < sink.\"VERSION\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; @@ -527,7 +527,7 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithVersionUpperCase, preActionsSql.get(2)); - String expectedInsertIntoTempStagingMaxVersion = "INSERT INTO \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + String expectedInsertIntoTempStagingMaxVersion = "INSERT INTO \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" " + "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 3f27df023f1..68fc66e0996 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -79,7 +79,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" <> stage.\"digest\")))"; @@ -88,7 +88,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + @@ -106,7 +106,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(1).metadataIngestSql().get(0)); Assertions.assertEquals(2, operations.size()); - String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"data_split\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"data_split\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + @@ -116,7 +116,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithFilterDuplicates, operations.get(0).deduplicationAndVersioningSql().get(1)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) as \"rowsInserted\""; @@ -210,7 +210,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -218,7 +218,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(sink.\"digest\" = stage.\"digest\") AND ((sink.\"id\" = stage.\"id\") AND " + @@ -227,7 +227,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" <> stage.\"digest\")))"; @@ -86,7 +86,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + @@ -104,7 +104,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(1).metadataIngestSql().get(0)); Assertions.assertEquals(2, operations.size()); - String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"data_split\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"data_split\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + @@ -116,7 +116,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(dupRowsSql, operations.get(0).deduplicationAndVersioningErrorChecksSql().get(DUPLICATE_ROWS)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') as \"rowsInserted\""; @@ -172,7 +172,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -180,7 +180,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND (sink.\"digest\" = stage.\"digest\") " + @@ -190,7 +190,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsInserted\""; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java index 9933902880b..9ce111bf20f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java @@ -94,13 +94,13 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 2dafcb4408a..b345447dea8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -79,13 +79,13 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -130,13 +130,13 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = " + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + "UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1,sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00.000000' " + - "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\"," + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (stage.\"DIGEST\" IN (SELECT sink.\"DIGEST\" FROM \"MYDB\".\"MAIN\" as sink WHERE sink.\"BATCH_ID_OUT\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java index a0b04f4d4b4..258bee99dfa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java @@ -86,14 +86,14 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '9999-12-31 23:59:59')))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java new file mode 100644 index 00000000000..3ff7f0ede46 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java @@ -0,0 +1,30 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TableNameGenUtilsTest +{ + @Test + public void testTableNameGen() + { + String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; + String expectedTableName = "person_temp_lp_yosulf"; + String tableName = TableNameGenUtils.generateTableName("person", "temp", ingestRunId); + Assertions.assertEquals(expectedTableName, tableName); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index 92d9869b210..54136d68f7e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -74,7 +74,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -91,7 +91,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(2, generatorResults.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -112,7 +112,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -147,7 +147,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_number`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -167,7 +167,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_number` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -188,7 +188,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`, `BATCH_ID`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -205,7 +205,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -224,7 +224,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -259,7 +259,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); @@ -327,6 +327,7 @@ public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisting .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); @@ -340,7 +341,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,LAKEHOUSE_MD5(TO_JSON(STRUCT(stage.`name`,stage.`biz_date`))),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -357,7 +358,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableIngestQuery, generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java index 34328f91db4..27b0bc59f63 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java @@ -56,14 +56,14 @@ public class BigQueryTestArtifacts "`digest` STRING," + "PRIMARY KEY (`id`, `name`) NOT ENFORCED)"; - public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`biz_date` DATE," + "`legend_persistence_count` INT64)"; - public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -72,7 +72,7 @@ public class BigQueryTestArtifacts "`version` INT64," + "`legend_persistence_count` INT64)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -80,7 +80,7 @@ public class BigQueryTestArtifacts "`digest` STRING," + "`legend_persistence_count` INT64)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -89,7 +89,7 @@ public class BigQueryTestArtifacts "`legend_persistence_count` INT64," + "`data_split` INT64 NOT NULL)"; - public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -190,7 +190,7 @@ public class BigQueryTestArtifacts public static String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage WHERE 1 = 1"; - public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE 1 = 1"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE 1 = 1"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging`"; @@ -489,7 +489,7 @@ public class BigQueryTestArtifacts "`delete_indicator` STRING," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`) NOT ENFORCED)"; - public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`," + @@ -498,13 +498,13 @@ public class BigQueryTestArtifacts "FROM `mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage) " + "as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + @@ -513,31 +513,31 @@ public class BigQueryTestArtifacts "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + "WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; - public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`, `data_split`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage"; public static String dataErrorCheckSqlForBizDateAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; public static String dataErrorCheckSqlForVersionAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version`) as stage"; - public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE 1 = 1"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE 1 = 1"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT` " + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + @@ -547,26 +547,26 @@ public class BigQueryTestArtifacts "FROM `MYDB`.`STAGING` as stage GROUP BY stage.`ID`, stage.`NAME`, stage.`AMOUNT`, stage.`BIZ_DATE`, stage.`DIGEST`) as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_RANK` = 1)"; public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE stage.`legend_persistence_count` > 1 LIMIT 20"; public static String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; - public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + "as stage GROUP BY `ID`, `NAME`, `BIZ_DATE` HAVING `LEGEND_PERSISTENCE_ERROR_COUNT` > 1 LIMIT 20"; public static String dataErrorsSql = "SELECT `id`,`name`,`version`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; public static String getDropTempTableQuery(String tableName) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index b2d96140351..82ab77db5c0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -60,6 +60,7 @@ public class BulkLoadTest private static final String COL_DECIMAL = "col_decimal"; private static final String COL_DATETIME = "col_datetime"; private static final String COL_VARIANT = "col_variant"; + private static final String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; private static Field col1 = Field.builder() .name(COL_INT) @@ -117,6 +118,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) .batchIdPattern("{NEXT_BATCH_ID}") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -129,14 +131,14 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64,`append_time` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + "(SELECT 'my_name',{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"xyz123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; @@ -191,6 +193,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoEventId() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .putAllAdditionalMetadata(ADDITIONAL_METADATA) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -203,14 +206,14 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoEventId() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64,`append_time` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], compression='GZIP', encoding='UTF8', field_delimiter=',', format='CSV', max_bad_records=100, null_marker='NULL', quote=''', skip_leading_rows=1)"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`, `additional_metadata`) " + "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + @@ -258,6 +261,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) .putAllAdditionalMetadata(ADDITIONAL_METADATA) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -270,14 +274,14 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`) " + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; String expectedMetaIngestSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`, `additional_metadata`) " + "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME')," + @@ -325,6 +329,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -336,7 +341,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`digest` STRING,`batch_id` INT64,`append_time` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; @@ -345,7 +350,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`," + "LAKEHOUSE_MD5(TO_JSON(STRUCT(legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`)))," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, preActionsSql.get(2)); @@ -387,6 +392,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) .caseConversion(CaseConversion.TO_UPPER) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -398,14 +404,14 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `MY_DB`.`MY_NAME`" + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON,`DIGEST` STRING,`BATCH_ID` INT64,`APPEND_TIME` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `MY_DB`.`MY_NAME_TEMP_LP_YOSULF` " + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC,`COL_DATETIME` DATETIME,`COL_VARIANT` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `MY_DB`.`MY_NAME` " + "(`COL_INT`, `COL_STRING`, `COL_DECIMAL`, `COL_DATETIME`, `COL_VARIANT`, `DIGEST`, `BATCH_ID`, `APPEND_TIME`) " + "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(STRUCT(legend_persistence_temp.`COL_VARIANT`))),(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` as legend_persistence_temp)"; + "FROM `MY_DB`.`MY_NAME_TEMP_LP_YOSULF` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, preActionsSql.get(2)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index 703326e518a..0c2bd02ec81 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -30,10 +30,10 @@ public class NontemporalDeltaTest extends org.finos.legend.engine.persistence.co protected String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsTempStaginTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + protected String incomingRecordCountWithSplitsTempStaginTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as `rowsTerminated`"; @@ -90,7 +90,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List metaIngestSqlList = operations.metadataIngestSql(); String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "USING `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + "THEN UPDATE SET " + @@ -121,7 +121,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + @@ -177,7 +177,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + @@ -364,7 +364,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String mergeSql = "MERGE INTO `mydb`.`main` as sink " + "USING " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND stage.`version` > sink.`version` " + "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version`," + @@ -450,7 +450,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String mergeSql = "MERGE INTO `MYDB`.`MAIN` as sink " + "USING " + - "`MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "`MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "ON (sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`) " + "WHEN MATCHED AND stage.`VERSION` >= sink.`VERSION` " + "THEN UPDATE SET sink.`ID` = stage.`ID`,sink.`NAME` = stage.`NAME`,sink.`AMOUNT` = stage.`AMOUNT`,sink.`BIZ_DATE` = stage.`BIZ_DATE`,sink.`DIGEST` = stage.`DIGEST`,sink.`VERSION` = stage.`VERSION`," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 7b7297a272f..8bc01638fbe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -62,7 +62,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -85,7 +85,7 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 4e4513caa75..b1fdeb17e82 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -122,7 +122,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -130,7 +130,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -157,7 +157,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -165,7 +165,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -183,7 +183,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` " + "as stage WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -373,7 +373,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -404,14 +404,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + - "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` " + "as stage WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -419,7 +419,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -487,7 +487,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(stage.`version` > sink.`version`)))"; @@ -496,7 +496,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999 " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -519,14 +519,14 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA " + "as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 " + "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + - "(EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "(EXISTS (SELECT * FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + "(stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 9ae66a9d31d..8f22f55b79a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -74,7 +74,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -83,7 +83,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -101,7 +101,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) as `rowsInserted`"; @@ -195,7 +195,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index 3062b2d59ea..6cf1a5f832e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -71,7 +71,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -79,7 +79,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + @@ -97,7 +97,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; @@ -153,7 +153,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -161,7 +161,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + @@ -180,7 +180,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List getLatestStagingFilters(RelationalConnection connecti return ApiUtils.extractDatasetFilters(metadataDataset, executor, physicalPlan); } + public String getIngestRunId() + { + return this.ingestRunId; + } + // ---------- UTILITY METHODS ---------- private void validateDatasetsInitialization() @@ -600,6 +605,9 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) { throw new IllegalStateException("Executor not initialized, call init(Connection) before invoking this method!"); } + // 0. Set the run id + ingestRunId = UUID.randomUUID().toString(); + // 1. Case handling enrichedIngestMode = ApiUtils.applyCase(ingestMode(), caseConversion()); enrichedDatasets = ApiUtils.enrichAndApplyCase(datasets, caseConversion()); @@ -665,6 +673,7 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) .sampleRowCount(sampleRowCount()) + .ingestRunId(ingestRunId) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, generator.plannerOptions(), relationalSink().capabilities()); @@ -756,7 +765,7 @@ private Datasets importExternalDataset(Datasets datasets) DatasetReference mainDataSetReference = datasets.mainDataset().datasetReference(); externalDatasetReference = externalDatasetReference - .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : TableNameGenUtils.generateTableName(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING)) + .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : TableNameGenUtils.generateTableName(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING, ingestRunId)) .withDatabase(externalDatasetReference.database().isPresent() ? externalDatasetReference.database().get() : mainDataSetReference.database().orElse(null)) .withGroup(externalDatasetReference.group().isPresent() ? externalDatasetReference.group().get() : mainDataSetReference.group().orElse(null)) .withAlias(externalDatasetReference.alias().isPresent() ? externalDatasetReference.alias().get() : mainDataSetReference.alias().orElseThrow(RuntimeException::new) + UNDERSCORE + STAGING); @@ -818,20 +827,6 @@ private Map executeStatisticsPhysicalPlan(Executor executeDeduplicationAndVersioningErrorChecks(Executor executor, - Map errorChecksPlan) - { - Map results = new HashMap<>(); - for (Map.Entry entry: errorChecksPlan.entrySet()) - { - List result = executor.executePhysicalPlanAndGetResults(entry.getValue()); - Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); - Object value = obj.orElse(null); - results.put(entry.getKey(), value); - } - return results; - } - private Map extractPlaceHolderKeyValues(Datasets datasets, Executor executor, Planner planner, Transformer transformer, IngestMode ingestMode, Optional dataSplitRange) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index d60a6fa94bc..558c51a70f5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -552,6 +552,7 @@ void testAppendOnlyWithUDFDigestGenerationWithFieldsToExclude() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId("075605e3-bada-47d7-9ae9-7138f392fe22") .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -568,7 +569,7 @@ void testAppendOnlyWithUDFDigestGenerationWithFieldsToExclude() throws Exception "LAKEHOUSE_MD5(ARRAY['id','name','income','start_time','expiry_date'],ARRAY[CONVERT(staging.\"id\",VARCHAR),CONVERT(staging.\"name\",VARCHAR),CONVERT(staging.\"income\",VARCHAR),CONVERT(staging.\"start_time\",VARCHAR),CONVERT(staging.\"expiry_date\",VARCHAR)])," + "'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"TEST\".\"staging_legend_persistence_temp_staging\" as staging WHERE (staging.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (staging.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + "FROM \"TEST\".\"staging_temp_staging_lp_yosulf\" as staging WHERE (staging.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (staging.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java index fbd1db21ff2..ce96f7ab760 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -36,6 +36,7 @@ import org.finos.legend.engine.persistence.components.relational.exception.DataQualityException; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -47,7 +48,7 @@ import static org.finos.legend.engine.persistence.components.TestUtils.*; import static org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract.DATA_SPLIT; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_STAGING_DATASET_BASE_NAME; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_STAGING_DATASET_QUALIFIER; public class TestDedupAndVersioning extends BaseTest { @@ -105,8 +106,6 @@ public class TestDedupAndVersioning extends BaseTest .addFields(batch) .build(); - private static final String tempStagingTableName = stagingTableName + "_" + TEMP_STAGING_DATASET_BASE_NAME; - String[] schemaWithCount = new String[]{idName, nameName, incomeName, expiryDateName, digestName, "legend_persistence_count"}; String[] schemaWithVersion = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; String[] schemaWithVersionAndCount = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName, "legend_persistence_count"}; @@ -126,9 +125,9 @@ void testNoDedupNoVersioning() .auditing(NoAuditing.builder().build()) .build(); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); } // Scenario 2 @@ -143,9 +142,9 @@ void testNoDedupMaxVersioningDoNotPerform() .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) .build(); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); } // Scenario 3 @@ -166,15 +165,15 @@ void testNoDedupMaxVersioning() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_max_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersion); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersion, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -205,9 +204,9 @@ void testNoDedupAllVersioningDoNotPerform() .versioningStrategy(AllVersionsStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) .build(); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); } // Scenario 5 @@ -230,15 +229,15 @@ void testNoDedupAllVersion() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_all_version.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersionAndDataSplit); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndDataSplit, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -271,9 +270,9 @@ void testFilterDupsNoVersioning() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data1_filter_dups_no_versioning.csv"; loadDataIntoStagingTableWithoutVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithCount); + verifyResults(expectedDataPath, schemaWithCount, ingestRunId); } // Scenario 7 @@ -294,8 +293,8 @@ void testFilterDupsMaxVersionDoNotPerform() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersionAndCount); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); } @@ -317,15 +316,15 @@ void testFilterDupsMaxVersion() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_max_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersionAndCount); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -362,9 +361,9 @@ void testFilterDupsAllVersionDoNotPerform() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); } // Scenario 10 @@ -387,16 +386,16 @@ void testFilterDupsAllVersion() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_all_version.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit); + verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -430,7 +429,7 @@ void testFailOnDupsNoVersioning() throws Exception try { - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -470,9 +469,9 @@ void testFailOnDupsMaxVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Duplicates scenario, should throw error @@ -480,7 +479,7 @@ void testFailOnDupsMaxVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -515,9 +514,9 @@ void testFailOnDupsMaxVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_max_versioin.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Duplicates scenario, should throw error @@ -525,7 +524,7 @@ void testFailOnDupsMaxVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -563,9 +562,9 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Duplicates scenario, should throw error @@ -573,7 +572,7 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -610,9 +609,9 @@ void testFailOnDupsAllVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit); + verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit, ingestRunId); // Duplicates scenario, should throw error @@ -620,7 +619,7 @@ void testFailOnDupsAllVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } catch (DataQualityException e) @@ -646,11 +645,11 @@ public static DatasetDefinition getStagingTableWithoutVersion() .build(); } - private Dataset getTempStagingDataset() + private Dataset getTempStagingDataset(String ingestRunId) { return DatasetReferenceImpl.builder() .group(testSchemaName) - .name(tempStagingTableName) + .name(getTempStagingTableName(ingestRunId)) .build(); } @@ -687,7 +686,7 @@ public static void createStagingTableWithVersion() h2Sink.executeStatement(createSql); } - private static void performDedupAndVersioining(Datasets datasets, IngestMode ingestMode) + private static String performDedupAndVersioining(Datasets datasets, IngestMode ingestMode) { RelationalIngestor ingestor = RelationalIngestor.builder() .ingestMode(ingestMode) @@ -698,6 +697,7 @@ private static void performDedupAndVersioining(Datasets datasets, IngestMode ing ingestor.initDatasets(datasets); ingestor.create(); ingestor.dedupAndVersion(); + return ingestor.getIngestRunId(); } public static void loadDataIntoStagingTableWithoutVersion(String path) throws Exception @@ -730,10 +730,15 @@ public static void loadDataIntoStagingTableWithVersionAndBatch(String path) thro h2Sink.executeStatement(loadSql); } - private void verifyResults(String expectedDataPath, String [] schema) throws IOException + private void verifyResults(String expectedDataPath, String [] schema, String ingestRunId) throws IOException { - Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset())); - List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", tempStagingTableName)); + Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); + List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", getTempStagingTableName(ingestRunId))); TestUtils.assertFileAndTableDataEquals(schema, expectedDataPath, tableData); } + + private String getTempStagingTableName(String ingestRunId) + { + return TableNameGenUtils.generateTableName(stagingTableName, TEMP_STAGING_DATASET_QUALIFIER, ingestRunId); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index ab5d15c288d..2585bf392f6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -69,7 +69,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -86,7 +86,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableIngestQuery, generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -107,7 +107,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -142,7 +142,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_number`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -162,7 +162,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableIngestQueryWithAdditionalMetadata, operations.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_number` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -183,7 +183,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`, `BATCH_ID`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -200,7 +200,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -219,7 +219,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -254,7 +254,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java index 3ff5c9baa25..e8fa3a61038 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java @@ -66,14 +66,14 @@ public class MemsqlTestArtifacts "`batch_id` INTEGER," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`biz_date` DATE," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -82,7 +82,7 @@ public class MemsqlTestArtifacts "`version` INTEGER," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -90,7 +90,7 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -181,7 +181,7 @@ public class MemsqlTestArtifacts "PRIMARY KEY (`id`, `name`, `batch_update_time`))"; public static String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage"; - public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging` CASCADE"; @@ -467,7 +467,7 @@ public class MemsqlTestArtifacts "`delete_indicator` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; - public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`," + @@ -476,13 +476,13 @@ public class MemsqlTestArtifacts "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage) " + "as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + @@ -491,32 +491,32 @@ public class MemsqlTestArtifacts "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + "WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage"; public static String dataErrorCheckSqlForBizDateAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; public static String dataErrorCheckSqlForVersionAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version`) as stage"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT` " + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + @@ -526,17 +526,17 @@ public class MemsqlTestArtifacts "FROM `MYDB`.`STAGING` as stage GROUP BY stage.`ID`, stage.`NAME`, stage.`AMOUNT`, stage.`BIZ_DATE`, stage.`DIGEST`) as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_RANK` = 1)"; public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; - public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE stage.`legend_persistence_count` > 1 LIMIT 20"; public static String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; - public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + "as stage GROUP BY `ID`, `NAME`, `BIZ_DATE` HAVING `LEGEND_PERSISTENCE_ERROR_COUNT` > 1 LIMIT 20"; public static String dataErrorsSql = "SELECT `id`,`name`,`version`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index 6dc0d9a95f1..abdb2dca087 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -32,10 +32,10 @@ public class NontemporalDeltaTest extends NontemporalDeltaTestCases "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsTempTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + protected String incomingRecordCountWithSplitsTempTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as `rowsTerminated`"; protected String rowsDeleted = "SELECT 0 as `rowsDeleted`"; @@ -86,7 +86,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List metaIngestSqlList = operations.metadataIngestSql(); String updateSql = "UPDATE `mydb`.`main` as sink " + - "INNER JOIN `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "INNER JOIN `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + "SET sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + @@ -100,7 +100,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; @@ -121,7 +121,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + "SET sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + @@ -133,7 +133,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -197,7 +197,7 @@ public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) SET " + "sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + @@ -210,7 +210,7 @@ public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -425,7 +425,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String updateSql = "UPDATE `mydb`.`main` as sink " + "INNER JOIN " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`) " + "SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version`," + "sink.`batch_id` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')"; @@ -433,7 +433,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQuery, preActionsSqlList.get(0)); @@ -524,7 +524,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String updateSql = "UPDATE `MYDB`.`MAIN` as sink " + "INNER JOIN " + - "`MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "`MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "ON ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`) " + "SET sink.`ID` = stage.`ID`,sink.`NAME` = stage.`NAME`,sink.`AMOUNT` = stage.`AMOUNT`,sink.`BIZ_DATE` = stage.`BIZ_DATE`,sink.`DIGEST` = stage.`DIGEST`,sink.`VERSION` = stage.`VERSION`," + "sink.`BATCH_ID` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')"; @@ -532,7 +532,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase, preActionsSqlList.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 96f9b10b58a..4682a936879 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -67,7 +67,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -90,14 +90,14 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; String maxDataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`amount`)) as `legend_persistence_distinct_rows` " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`amount`)) as `legend_persistence_error_count` " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index a6f5014a130..e01fe7b8838 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -121,7 +121,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -129,7 +129,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -156,7 +156,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -164,7 +164,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -182,7 +182,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + - "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -372,7 +372,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -403,14 +403,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + - "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -418,7 +418,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -486,7 +486,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(stage.`version` > sink.`version`)))"; @@ -495,7 +495,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999 " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -517,14 +517,14 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + - "(EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + + "(EXISTS (SELECT * FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + "(stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 3bedbebbb25..726ff04cca4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -74,7 +74,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -83,7 +83,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -101,7 +101,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) as `rowsInserted`"; @@ -195,7 +195,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index ec8d442fc5e..87dbd91b175 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -72,7 +72,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -80,7 +80,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + @@ -98,7 +98,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000') as `rowsInserted`"; @@ -154,7 +154,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND (sink.`digest` = stage.`digest`) " + @@ -181,7 +181,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); @@ -100,7 +101,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('name',stage.\"name\",'biz_date',stage.\"biz_date\"))," + "'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), generatorResults.get(0).preActionsSql().get(1)); @@ -116,7 +117,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index d0db719fb61..126e860bc58 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -52,6 +52,8 @@ public class BulkLoadTest { private static final String APPEND_TIME = "append_time"; + private static final String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; + private static Field col1 = Field.builder() .name("col_int") @@ -110,6 +112,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") .batchIdPattern("{NEXT_BATCH_ID}") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -134,9 +137,9 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() String expectedMetadataIngestSql = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + "(SELECT 'my_name',{NEXT_BATCH_ID},'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"task123\",\"file_patterns\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; - String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation\"" + + String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; - String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation\" FROM my_location " + + String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'ABORT_STATEMENT' " + @@ -187,6 +190,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) .batchSuccessStatusValue("SUCCEEDED") .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -250,6 +254,7 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -350,6 +355,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); @@ -390,6 +396,7 @@ public void testBulkLoadWithDigest() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -451,6 +458,7 @@ public void testBulkLoadWithDigestAndForceOption() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -521,6 +529,7 @@ public void testBulkLoadWithDigestWithFieldsToExcludeAndForceOption() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -585,6 +594,7 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -606,9 +616,9 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() "FILE_FORMAT = (FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE'"; - String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation\"" + + String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; - String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation\" FROM my_location " + + String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE' " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java index e05ebb63bae..9f4948be9f2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java @@ -77,7 +77,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List metaIngestSqlList = operations.metadataIngestSql(); String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "USING \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + "THEN UPDATE SET " + @@ -107,7 +107,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + @@ -161,7 +161,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + @@ -343,7 +343,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + "USING " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND stage.\"version\" > sink.\"version\" " + "THEN UPDATE SET sink.\"id\" = stage.\"id\",sink.\"name\" = stage.\"name\",sink.\"amount\" = stage.\"amount\",sink.\"biz_date\" = stage.\"biz_date\",sink.\"digest\" = stage.\"digest\",sink.\"version\" = stage.\"version\"," + @@ -426,7 +426,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String mergeSql = "MERGE INTO \"MYDB\".\"MAIN\" as sink " + "USING " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "ON (sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\") " + "WHEN MATCHED AND stage.\"VERSION\" >= sink.\"VERSION\" " + "THEN UPDATE SET sink.\"ID\" = stage.\"ID\",sink.\"NAME\" = stage.\"NAME\",sink.\"AMOUNT\" = stage.\"AMOUNT\",sink.\"BIZ_DATE\" = stage.\"BIZ_DATE\",sink.\"DIGEST\" = stage.\"DIGEST\",sink.\"VERSION\" = stage.\"VERSION\"," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index b1aea5f0e4a..fb5e9b2d025 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -64,6 +64,7 @@ public class BaseTest protected String mainDbName = "mydb"; protected String mainTableName = "main"; protected String mainTableAlias = "sink"; + protected String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; protected String stagingDbName = "mydb"; protected String stagingTableName = "staging"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java index 2ed71270233..27c2dce46ed 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java @@ -81,6 +81,7 @@ void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(operations, dataSplitRangesOneToTwo); @@ -98,6 +99,7 @@ void testAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRec .cleanupStagingData(true) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult queries = generator.generateOperations(scenario.getDatasets()); @@ -135,6 +137,7 @@ public void testAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExisti .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); @@ -152,6 +155,7 @@ void testAppendOnlyWithUpperCaseOptimizer() .relationalSink(getRelationalSink()) .caseConversion(CaseConversion.TO_UPPER) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -171,6 +175,7 @@ void testAppendOnlyWithLessColumnsInStaging() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -188,6 +193,7 @@ void testAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecor .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -205,6 +211,7 @@ void testAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords() .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -276,6 +283,7 @@ public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisting .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java index b9b5332c9bb..5bb9569ab37 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java @@ -82,6 +82,7 @@ void testNontemporalDeltaWithAuditingFilterDupsNoVersioning() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); @@ -99,6 +100,7 @@ void testNonTemporalDeltaNoAuditingNoDedupAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId("075605e3-bada-47d7-9ae9-7138f392fe22") .build(); List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); @@ -133,6 +135,7 @@ void testNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); @@ -283,6 +286,7 @@ void testNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters() .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(false) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); @@ -337,6 +341,7 @@ void testNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase() .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .sampleRowCount(10) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java index d88af9bd1b4..e657d5abaa3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java @@ -70,6 +70,7 @@ void testNontemporalSnapshotWithAuditingFilterDupsNoVersioning() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); @@ -87,6 +88,7 @@ void testNontemporalSnapshotWithAuditingFailOnDupMaxVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java index 4cb7b8dd3ac..8425f1b8523 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java @@ -82,6 +82,7 @@ void testUnitemporalDeltaWithDeleteIndFilterDupsNoVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(operations); @@ -98,6 +99,7 @@ void testUnitemporalDeltaWithDeleteIndNoDedupAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(operations, dataSplitRangesOneToTwo); @@ -305,6 +307,7 @@ void testUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(false) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(operations); @@ -322,6 +325,7 @@ void testUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(false) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(operations); @@ -358,6 +362,7 @@ void testUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform() .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(true) .batchSuccessStatusValue("SUCCEEDED") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(operations); @@ -378,6 +383,7 @@ void testUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilte .caseConversion(CaseConversion.TO_UPPER) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) .sampleRowCount(10) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java index 47aa50fbc7b..0cb41f6eb10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java @@ -62,6 +62,7 @@ void testUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); @@ -110,6 +111,7 @@ void testUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(operations, dataSplitRangesOneToTwo); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java index cf3d7e63ca1..7478a2257de 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java @@ -60,6 +60,7 @@ void testUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); @@ -92,6 +93,7 @@ void testUnitemporalDeltaWithDeleteIndFilterDupsAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(operations, dataSplitRangesOneToTwo); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java index 29617a200a8..c5b3e4c4551 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java @@ -67,6 +67,7 @@ void testUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion() .collectStatistics(true) .createStagingDataset(true) .enableConcurrentSafety(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java index 1d44bb7b696..2658756a05a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java @@ -63,6 +63,7 @@ void testUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(operations); @@ -98,6 +99,7 @@ void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxV .caseConversion(CaseConversion.TO_UPPER) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) .batchSuccessStatusValue("SUCCEEDED") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java index 13bc1fca81d..68f94126a17 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java @@ -60,6 +60,7 @@ void testUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(operations); From 313e0df5696052a073be09c9f3e1b710e0d20e04 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 28 Feb 2024 15:40:02 +0800 Subject: [PATCH 12/32] Change logical try_cast to cast in non safe-cast flow + refactor --- .../logicalplan/operations/CopyAbstract.java | 3 +- .../values/CastFunctionAbstract.java | 35 ++++++++++++++++ .../components/planner/BulkLoadPlanner.java | 42 +++++++++++-------- .../components/relational/h2/H2Sink.java | 6 +-- ...nVisitor.java => CastFunctionVisitor.java} | 12 +++--- .../snowflake/sql/visitor/CopyVisitor.java | 4 +- 6 files changed, 71 insertions(+), 31 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/{TryCastFunctionVisitor.java => CastFunctionVisitor.java} (73%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java index 27f62edbae0..cadd43c5839 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java @@ -21,7 +21,6 @@ import java.util.List; import static org.immutables.value.Value.Immutable; -import static org.immutables.value.Value.Parameter; import static org.immutables.value.Value.Style; @Immutable @@ -43,7 +42,7 @@ public interface CopyAbstract extends Operation StagedFilesDatasetProperties stagedFilesDatasetProperties(); @org.immutables.value.Value.Default - default boolean dryRun() // TODO: rename this to validationModeSupported + default boolean validationMode() { return false; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java new file mode 100644 index 00000000000..6e7677776e8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface CastFunctionAbstract extends Value +{ + Value field(); + + FieldType type(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 0b690590ca5..d474c20a30e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -37,6 +37,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.CastFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; @@ -70,6 +71,7 @@ class BulkLoadPlanner extends Planner private boolean transformWhileCopy; private Dataset externalDataset; + private Dataset validationDataset; private StagedFilesDataset stagedFilesDataset; private static final String FILE = "FILE"; @@ -99,6 +101,11 @@ class BulkLoadPlanner extends Planner .alias(TEMP_DATASET_BASE_NAME) .build(); } + + if (capabilities.contains(Capability.DRY_RUN)) + { + validationDataset = stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported() ? getValidationDataset() : getValidationDatasetWithMetaColumns(); + } } private void validateNoPrimaryKeysInStageAndMain() @@ -140,19 +147,16 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) List operations = new ArrayList<>(); if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) { - Dataset validationDataset = getValidationDataset(); Copy copy = Copy.builder() .targetDataset(validationDataset) .sourceDataset(stagedFilesDataset.datasetReference().withAlias("")) .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) - .dryRun(true) + .validationMode(true) .build(); operations.add(copy); } else { - Dataset validationDataset = getValidationDatasetWithMetaColumns(); - List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), true); fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); fieldsToSelect.add(MetadataRowNumberField.builder().build()); @@ -168,7 +172,7 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) .sourceDataset(selectStage) .addAllFields(fieldsToInsert) .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) - .dryRun(false) + .validationMode(false) .build(); operations.add(copy); } @@ -182,9 +186,6 @@ public Map, LogicalPlan>> buildLogicalPl { return Collections.emptyMap(); } - - Dataset validationDataset = getValidationDatasetWithMetaColumns(); - Map, LogicalPlan>> validationMap = new HashMap<>(); List fieldsToCheckForNull = stagingDataset().schema().fields().stream().filter(field -> !field.nullable()).collect(Collectors.toList()); List fieldsToCheckForDatatype = stagingDataset().schema().fields().stream().filter(field -> !DataType.isStringDatatype(field.type().dataType())).collect(Collectors.toList()); @@ -217,8 +218,7 @@ public Map, LogicalPlan>> buildLogicalPl validationMap.put(ValidationCategory.DATATYPE_CONVERSION, new HashMap<>()); for (Field fieldToCheckForDatatype : fieldsToCheckForDatatype) { - // TODO: change this to use cast - since we know at this point try cast is not possible - Selection queryForDatatype = getSelectColumnsWithTryCast(validationDataset, Collections.singletonList(fieldToCheckForDatatype)); + Selection queryForDatatype = getSelectColumnsWithCast(validationDataset, fieldToCheckForDatatype); validationMap.get(ValidationCategory.DATATYPE_CONVERSION).put(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), LogicalPlan.of(Collections.singletonList(queryForDatatype))); } @@ -230,6 +230,7 @@ public Map, LogicalPlan>> buildLogicalPl private Selection getSelectColumnsWithTryCast(Dataset dataset, List fieldsToCheckForDatatype) { + // When using TRY_CAST, we can check all columns at once, as the query will not fail return Selection.builder() .source(dataset) .condition(Or.of(fieldsToCheckForDatatype.stream().map(field -> And.builder() @@ -240,6 +241,18 @@ private Selection getSelectColumnsWithTryCast(Dataset dataset, List field .build(); } + private Selection getSelectColumnsWithCast(Dataset dataset, Field fieldToCheckForDatatype) + { + // When using CAST, we have to check column by column as the query may fail and we need to know which column we have a problem in + return Selection.builder() + .source(dataset) + .condition(And.builder() + .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()))) + .addConditions(IsNull.of(CastFunction.builder().field(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()).type(fieldToCheckForDatatype.type()).build())) + .build()) + .build(); + } + private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), false); @@ -316,14 +329,7 @@ public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) List operations = new ArrayList<>(); if (capabilities.contains(Capability.DRY_RUN)) { - if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) - { - operations.add(Create.of(true, getValidationDataset())); - } - else - { - operations.add(Create.of(true, getValidationDatasetWithMetaColumns())); - } + operations.add(Create.of(true, validationDataset)); } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index dffa2c3744f..1f52aa8add5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -31,6 +31,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.LoadCsv; +import org.finos.legend.engine.persistence.components.logicalplan.values.CastFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.HashFunction; @@ -38,7 +39,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -67,7 +67,7 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesSelectionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ToArrayFunctionVisitor; -import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.TryCastFunctionVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CastFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.Transformer; @@ -136,7 +136,7 @@ public class H2Sink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); logicalPlanVisitorByClass.put(ToArrayFunction.class, new ToArrayFunctionVisitor()); - logicalPlanVisitorByClass.put(TryCastFunction.class, new TryCastFunctionVisitor()); + logicalPlanVisitorByClass.put(CastFunction.class, new CastFunctionVisitor()); logicalPlanVisitorByClass.put(MetadataFileNameField.class, new MetadataFileNameFieldVisitor()); logicalPlanVisitorByClass.put(MetadataRowNumberField.class, new MetadataRowNumberFieldVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CastFunctionVisitor.java similarity index 73% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CastFunctionVisitor.java index 46538b40475..eaf5a1a24c7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CastFunctionVisitor.java @@ -15,11 +15,10 @@ package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; -import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; +import org.finos.legend.engine.persistence.components.logicalplan.values.CastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; -import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction; import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; @@ -27,17 +26,18 @@ import java.util.ArrayList; import java.util.List; -public class TryCastFunctionVisitor implements LogicalPlanVisitor +public class CastFunctionVisitor implements LogicalPlanVisitor { @Override - public VisitorResult visit(PhysicalPlanNode prev, TryCastFunction current, VisitorContext context) + public VisitorResult visit(PhysicalPlanNode prev, CastFunction current, VisitorContext context) { DataType dataType = new H2DataTypeMapping().getDataType(current.type()); - CastFunction castFunction = new CastFunction(dataType, context.quoteIdentifier()); + org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction castFunction + = new org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction(dataType, context.quoteIdentifier()); for (Optimizer optimizer : context.optimizers()) { - castFunction = (CastFunction) optimizer.optimize(castFunction); + castFunction = (org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction) optimizer.optimize(castFunction); } prev.push(castFunction); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java index 2971fc677a5..12f717d070f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java @@ -57,7 +57,7 @@ private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProper copyStatement.setFilePatterns(properties.filePatterns()); copyStatement.setFilePaths(properties.filePaths()); - if (current.dryRun()) + if (current.validationMode()) { copyStatement.setValidationMode("RETURN_ERRORS"); } @@ -83,7 +83,7 @@ else if (format instanceof StandardFileFormat) { StandardFileFormat standardFileFormat = (StandardFileFormat) format; Map formatOptions = new HashMap<>(standardFileFormat.formatOptions()); - if (current.dryRun() && standardFileFormat.formatType().equals(FileFormatType.CSV)) + if (current.validationMode() && standardFileFormat.formatType().equals(FileFormatType.CSV)) { formatOptions.put("ERROR_ON_COLUMN_COUNT_MISMATCH", false); } From ca4aef59c7b498691fcfbfe0c28a77c0585ac1a7 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 28 Feb 2024 16:48:42 +0800 Subject: [PATCH 13/32] Add cleanup operations for dry run --- .../components/planner/BulkLoadPlanner.java | 22 ++++++++++++++----- .../components/planner/Planner.java | 5 +++++ .../api/GeneratorResultAbstract.java | 7 ++++++ .../api/RelationalGeneratorAbstract.java | 7 +++++- .../api/RelationalIngestorAbstract.java | 5 +++-- .../ingestmode/bulkload/BulkLoadTest.java | 2 ++ .../components/ingestmode/BulkLoadTest.java | 16 ++++++++++++-- 7 files changed, 54 insertions(+), 10 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index fb6d2d02535..3744ee67fe0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -29,11 +29,10 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.ExternalDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetAdditionalProperties; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.TableType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; @@ -159,6 +158,9 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) } else { + // As data is actually being loaded in this case, we delete all data from the validation dataset first to ensure we are in a clean slate + operations.add(Delete.builder().dataset(validationDataset).build()); + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), true); fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); fieldsToSelect.add(MetadataRowNumberField.builder().build()); @@ -355,6 +357,17 @@ public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) return LogicalPlan.of(operations); } + @Override + public LogicalPlan buildLogicalPlanForDryRunPostCleanup(Resources resources) + { + List operations = new ArrayList<>(); + if (capabilities.contains(Capability.DRY_RUN)) + { + operations.add(Drop.of(true, validationDataset, false)); + } + return LogicalPlan.of(operations); + } + @Override List getDigestOrRemainingColumns() { @@ -418,13 +431,13 @@ private Dataset getValidationDataset() .database(mainDataset().datasetReference().database()) .group(mainDataset().datasetReference().group()) .name(validationDatasetName) - .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) .build(); } private Dataset getValidationDatasetWithMetaColumns() { String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); + String validationDatasetName = TableNameGenUtils.generateTableName(tableName, "validation", options().ingestRunId()); List fields = stagedFilesDataset.schema().fields().stream().map(field -> field.withType(FieldType.builder().dataType(DataType.VARCHAR).build()).withNullable(true)).collect(Collectors.toList()); fields.add(Field.builder().name(FILE).type(FieldType.builder().dataType(DataType.VARCHAR).build()).build()); @@ -434,8 +447,7 @@ private Dataset getValidationDatasetWithMetaColumns() .schema(stagedFilesDataset.schema().withFields(fields)) .database(mainDataset().datasetReference().database()) .group(mainDataset().datasetReference().group()) - .name(tableName + UNDERSCORE + "validation") // TODO legend_persistence - .datasetAdditionalProperties(DatasetAdditionalProperties.builder().tableType(TableType.TEMPORARY).build()) + .name(validationDatasetName) .build(); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 1739450901f..4c2f2b56003 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -287,6 +287,11 @@ public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) return LogicalPlan.of(Collections.emptyList()); } + public LogicalPlan buildLogicalPlanForDryRunPostCleanup(Resources resources) + { + return LogicalPlan.of(Collections.emptyList()); + } + public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { // Save staging filters into batch_source_info column diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 250191dfae1..7a22f30d3c3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -72,6 +72,8 @@ public abstract class GeneratorResultAbstract public abstract Optional postCleanupSqlPlan(); + public abstract SqlPlan dryRunPostCleanupSqlPlan(); + public abstract Map deduplicationAndVersioningErrorChecksSqlPlan(); public abstract Map preIngestStatisticsSqlPlan(); @@ -149,6 +151,11 @@ public List postCleanupSql() return postCleanupSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); } + public List dryRunPostCleanupSql() + { + return dryRunPostCleanupSqlPlan().getSqlList(); + } + public Map preIngestStatisticsSql() { return preIngestStatisticsSqlPlan().keySet().stream() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index d474fd26cbf..c623ff8f00e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -256,7 +256,6 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann acquireLockSqlPlan = Optional.of(transformer.generatePhysicalPlan(acquireLockLogicalPlan)); } - // schema evolution Optional schemaEvolutionSqlPlan = Optional.empty(); Optional schemaEvolutionDataset = Optional.empty(); @@ -318,6 +317,7 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan postActionsLogicalPlan = planner.buildLogicalPlanForPostActions(resources); SqlPlan postActionsSqlPlan = transformer.generatePhysicalPlan(postActionsLogicalPlan); + // post-cleanup LogicalPlan postCleanupLogicalPlan = planner.buildLogicalPlanForPostCleanup(resources); Optional postCleanupSqlPlan = Optional.empty(); if (postCleanupLogicalPlan != null) @@ -325,6 +325,10 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann postCleanupSqlPlan = Optional.of(transformer.generatePhysicalPlan(postCleanupLogicalPlan)); } + // dry-run post-cleanup + LogicalPlan dryRunPostCleanupLogicalPlan = planner.buildLogicalPlanForDryRunPostCleanup(resources); + SqlPlan dryRunPostCleanupSqlPlan = transformer.generatePhysicalPlan(dryRunPostCleanupLogicalPlan); + // post-run statistics Map postIngestStatisticsLogicalPlan = planner.buildLogicalPlanForPostRunStatistics(resources); Map postIngestStatisticsSqlPlan = new HashMap<>(); @@ -345,6 +349,7 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann .putAllDryRunValidationSqlPlan(dryRunValidationSqlPlan) .postActionsSqlPlan(postActionsSqlPlan) .postCleanupSqlPlan(postCleanupSqlPlan) + .dryRunPostCleanupSqlPlan(dryRunPostCleanupSqlPlan) .metadataIngestSqlPlan(metaDataIngestSqlPlan) .deduplicationAndVersioningSqlPlan(deduplicationAndVersioningSqlPlan) .putAllDeduplicationAndVersioningErrorChecksSqlPlan(deduplicationAndVersioningErrorChecksSqlPlan) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 49bd92fff01..cdd75637c07 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -540,8 +540,9 @@ private List performDryRun() if (enrichedIngestMode instanceof BulkLoad) { executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); - executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); - return relationalSink().performDryRun(transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); + List results = relationalSink().performDryRun(transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); + executor.executePhysicalPlan(generatorResult.dryRunPostCleanupSqlPlan()); + return results; } else { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 40584fdf69a..c36a3f65d1b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -811,6 +811,7 @@ public void testBulkLoadDryRunSuccess() System.out.println(operations.dryRunPreActionsSql()); System.out.println(operations.dryRunSql()); System.out.println(operations.dryRunValidationSql()); + System.out.println(operations.dryRunPostCleanupSql()); // Verify execution using ingestor @@ -883,6 +884,7 @@ public void testBulkLoadDryRunFailure() System.out.println(operations.dryRunPreActionsSql()); System.out.println(operations.dryRunSql()); System.out.println(operations.dryRunValidationSql()); + System.out.println(operations.dryRunPostCleanupSql()); // Verify execution using ingestor diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 9ee2b1195a9..c968b32da0a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -28,6 +28,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -35,6 +36,7 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.UserDefinedFileFormat; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -46,6 +48,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import static org.finos.legend.engine.persistence.components.common.StatisticName.*; @@ -128,8 +131,10 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); List dryRunSql = operations.dryRunSql(); + Map, String>> dryRunValidationSql = operations.dryRunValidationSql(); List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); + List dryRunPostCleanupSql = operations.dryRunPostCleanupSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER,\"col_integer\" INTEGER,\"batch_id\" INTEGER,\"append_time\" DATETIME)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + @@ -144,19 +149,22 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() String expectedMetadataIngestSql = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + "(SELECT 'my_name',{NEXT_BATCH_ID},'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"task123\",\"file_patterns\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; - String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + + String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'ABORT_STATEMENT' " + "VALIDATION_MODE = 'RETURN_ERRORS'"; + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); Assertions.assertEquals(expectedDryRunSql, dryRunSql.get(0)); + Assertions.assertTrue(dryRunValidationSql.isEmpty()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, dryRunPostCleanupSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -230,6 +238,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() System.out.println(operations.dryRunPreActionsSql()); System.out.println(operations.dryRunSql()); System.out.println(operations.dryRunValidationSql()); + System.out.println(operations.dryRunPostCleanupSql()); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -628,18 +637,21 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() "FILE_FORMAT = (FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE'"; - String expectedDryRunPreActionsSql = "CREATE TEMPORARY TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + + String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE' " + "VALIDATION_MODE = 'RETURN_ERRORS'"; + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedDryRunPreActionsSql, operations.dryRunPreActionsSql().get(0)); Assertions.assertEquals(expectedDryRunSql, operations.dryRunSql().get(0)); + Assertions.assertTrue(operations.dryRunValidationSql().isEmpty()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); From d8bff938d83464c9380a526dfdcc1ba4567e8493 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 28 Feb 2024 14:46:37 +0530 Subject: [PATCH 14/32] Rebase with master --- .../components/relational/executor/RelationalExecutor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java index e28bcf7fd4c..76857df5f2f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java @@ -87,8 +87,7 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, List resultSetList = new ArrayList<>(); for (String sql : physicalPlan.getSqlList()) { - String enrichedSql = SqlUtils.getEnrichedSql(placeholderKeyValues, sql); - SqlUtils.logSql(LOGGER, sqlLogging, sql, enrichedSql, placeholderKeyValues); + SqlUtils.logSql(LOGGER, sqlLogging, sql); List> queryResult = relationalExecutionHelper.executeQuery(sql, rows); if (!queryResult.isEmpty()) { From 8a1e171ccf80d826213e0fe7d322b5bebb3be7d5 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 29 Feb 2024 16:23:48 +0800 Subject: [PATCH 15/32] Implement logic for sample row count + add SQL tests + address some comments --- .../components/planner/BulkLoadPlanner.java | 10 ++- .../components/util/Capability.java | 2 +- .../components/util/LogicalPlanUtils.java | 2 + .../relational/ansi/AnsiSqlSink.java | 60 +++++++++++++ .../components/relational/RelationalSink.java | 32 ------- .../api/RelationalIngestorAbstract.java | 1 + .../components/relational/h2/H2Sink.java | 28 ++++-- .../ingestmode/bulkload/BulkLoadTest.java | 90 ++++++++++++++++--- .../relational/snowflake/SnowflakeSink.java | 26 +++++- .../components/ingestmode/BulkLoadTest.java | 59 +++++++++--- 10 files changed, 242 insertions(+), 68 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 3744ee67fe0..91a64571fa7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -146,6 +146,8 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) } List operations = new ArrayList<>(); + operations.add(Delete.builder().dataset(validationDataset).build()); + if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) { Copy copy = Copy.builder() @@ -158,9 +160,6 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) } else { - // As data is actually being loaded in this case, we delete all data from the validation dataset first to ensure we are in a clean slate - operations.add(Delete.builder().dataset(validationDataset).build()); - List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), true); fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); fieldsToSelect.add(MetadataRowNumberField.builder().build()); @@ -201,6 +200,7 @@ public Map, LogicalPlan>> buildLogicalPl .condition(Or.of(fieldsToCheckForNull.stream().map(field -> IsNull.of(FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build())) .collect(Collectors.toList()))) + .limit(options().sampleRowCount()) .build(); validationMap.put(ValidationCategory.NULL_VALUES, @@ -210,7 +210,7 @@ public Map, LogicalPlan>> buildLogicalPl if (!fieldsToCheckForDatatype.isEmpty()) { - if (capabilities.contains(Capability.SAFE_CAST)) + if (capabilities.contains(Capability.TRY_CAST)) { Selection queryForDatatype = getSelectColumnsWithTryCast(validationDataset, fieldsToCheckForDatatype); validationMap.put(ValidationCategory.DATATYPE_CONVERSION, @@ -242,6 +242,7 @@ private Selection getSelectColumnsWithTryCast(Dataset dataset, List field .addConditions(IsNull.of(TryCastFunction.builder().field(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build()).type(field.type()).build())) .build()) .collect(Collectors.toList()))) + .limit(options().sampleRowCount()) .build(); } @@ -254,6 +255,7 @@ private Selection getSelectColumnsWithCast(Dataset dataset, Field fieldToCheckFo .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()))) .addConditions(IsNull.of(CastFunction.builder().field(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()).type(fieldToCheckForDatatype.type()).build())) .build()) + .limit(options().sampleRowCount()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index 9bd5c256417..a40ef45b345 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -24,5 +24,5 @@ public enum Capability DATA_TYPE_SCALE_CHANGE, TRANSFORM_WHILE_COPY, DRY_RUN, - SAFE_CAST + TRY_CAST } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index c815f0f1fa2..49f476a3c1f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -396,6 +396,7 @@ public static List findCommonPrimaryFieldsBetweenMainAndStaging(Dataset m return stagingDataset.schema().fields().stream().filter(field -> field.primaryKey() && primaryKeysFromMain.contains(field.name())).collect(Collectors.toList()); } + // TODO: another method: public static List extractStagedFilesFieldValuesWithVarCharType(Dataset dataset) public static List extractStagedFilesFieldValues(Dataset dataset, boolean withVarCharType) { List stagedFilesFields = new ArrayList<>(); @@ -403,6 +404,7 @@ public static List extractStagedFilesFieldValues(Dataset dataset, boolean int iter = 1; for (Field field : dataset.schema().fields()) { + // TODO: extract this part into a private method that takes the desired type StagedFilesFieldValue fieldValue = StagedFilesFieldValue.builder() .columnNumber(columnNumbersPresent ? field.columnNumber().get() : iter++) .datasetRefAlias(dataset.datasetReference().alias()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 54f78f840c1..fe1827ec837 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -160,6 +160,7 @@ import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.util.*; +import java.util.stream.Collectors; public class AnsiSqlSink extends RelationalSink { @@ -333,4 +334,63 @@ public List performDryRun(Transformer transformer, E { throw new UnsupportedOperationException("DryRun not supported!"); } + + protected Optional getString(Map row, String key) + { + Object value = row.get(key); + String strValue = value == null ? null : (String) value; + return Optional.ofNullable(strValue); + } + + protected Optional getLong(Map row, String key) + { + Object value = row.get(key); + Long longValue = value == null ? null : (Long) value; + return Optional.ofNullable(longValue); + } + + protected DataError constructDataError(Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) + { + // TODO: follow the order of schema object + String commaSeparatedRow = row.keySet().stream() + .sorted() + .filter(key -> !key.equals(fileNameColumnName) && !key.equals(rowNumberColumnName)) + .map(key -> getString(row, key).orElse("")) + .collect(Collectors.joining(",")); + + return DataError.builder() + .errorMessage(validationCategory.getValidationFailedErrorMessage()) + .file(getString(row, fileNameColumnName).orElseThrow(IllegalStateException::new)) + .errorCategory(validationCategory.name()) + .columnName(validatedColumnName) + .rowNumber(getLong(row, rowNumberColumnName)) + .rejectedRecord(commaSeparatedRow) + .build(); + } + + protected List getDataErrorsWithEqualDistributionAcrossCategories(int sampleRowCount, Map> dataErrorsByCategory) + { + List dataErrors = new ArrayList<>(); + Set exhaustedCategories = new HashSet<>(); + + while (dataErrors.size() < sampleRowCount && exhaustedCategories.size() != ValidationCategory.values().length) + { + for (ValidationCategory validationCategory : ValidationCategory.values()) + { + if (!dataErrorsByCategory.get(validationCategory).isEmpty()) + { + if (dataErrors.size() < sampleRowCount) + { + dataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); + } + } + else + { + exhaustedCategories.add(validationCategory); + } + } + } + + return dataErrors; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index fc790cd6cea..09991870237 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -195,36 +195,4 @@ public interface ConstructDatasetFromDatabase public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); public abstract List performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount); - - protected Optional getString(Map row, String key) - { - Object value = row.get(key); - String strValue = value == null ? null : (String) value; - return Optional.ofNullable(strValue); - } - - protected Optional getLong(Map row, String key) - { - Object value = row.get(key); - Long longValue = value == null ? null : (Long) value; - return Optional.ofNullable(longValue); - } - - protected DataError constructDataError(Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) - { - String commaSeparatedRow = row.keySet().stream() - .sorted() - .filter(key -> !key.equals(fileNameColumnName) && !key.equals(rowNumberColumnName)) - .map(key -> getString(row, key).orElse("")) - .collect(Collectors.joining(",")); - - return DataError.builder() - .errorMessage(validationCategory.getValidationFailedErrorMessage()) - .file(getString(row, fileNameColumnName).orElseThrow(IllegalStateException::new)) - .errorCategory(validationCategory.name()) - .columnName(validatedColumnName) - .rowNumber(getLong(row, rowNumberColumnName)) - .rejectedRecord(commaSeparatedRow) - .build(); - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index cdd75637c07..8a8c1c2b7ac 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -540,6 +540,7 @@ private List performDryRun() if (enrichedIngestMode instanceof BulkLoad) { executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); + // TODO: pass enrichedDatasets.stagingDataset() to the following method List results = relationalSink().performDryRun(transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); executor.executePhysicalPlan(generatorResult.dryRunPostCleanupSqlPlan()); return results; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 1f52aa8add5..626b9a136fb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.h2; import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import java.util.Optional; @@ -89,6 +90,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Queue; import java.util.Set; import java.util.stream.Collectors; @@ -265,6 +267,11 @@ public List performDryRun(Transformer transformer, E executor.executePhysicalPlan(dryRunSqlPlan); List dataErrors = new ArrayList<>(); + Map> dataErrorsByCategory = new HashMap<>(); + for (ValidationCategory validationCategory : ValidationCategory.values()) + { + dataErrorsByCategory.put(validationCategory, new LinkedList<>()); + } Map, SqlPlan> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUES, new HashMap<>()); Map, SqlPlan> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(DATATYPE_CONVERSION, new HashMap<>()); @@ -272,7 +279,7 @@ public List performDryRun(Transformer transformer, E // Execute queries for null values for (Set validatedColumns : queriesForNull.keySet()) { - List results = executor.executePhysicalPlanAndGetResults(queriesForNull.get(validatedColumns), sampleRowCount); + List results = executor.executePhysicalPlanAndGetResults(queriesForNull.get(validatedColumns)); if (!results.isEmpty()) { List> resultSets = results.get(0).getData(); @@ -282,7 +289,9 @@ public List performDryRun(Transformer transformer, E { if (row.get(column) == null) { - dataErrors.add(constructDataError(row, FILE, ROW_NUMBER, NULL_VALUES, column)); + DataError dataError = constructDataError(row, FILE, ROW_NUMBER, NULL_VALUES, column); + dataErrors.add(dataError); + dataErrorsByCategory.get(NULL_VALUES).add(dataError); } } } @@ -294,7 +303,7 @@ public List performDryRun(Transformer transformer, E { try { - executor.executePhysicalPlanAndGetResults(queriesForDatatype.get(validatedColumns), sampleRowCount); + executor.executePhysicalPlanAndGetResults(queriesForDatatype.get(validatedColumns)); } catch (RuntimeException e) { @@ -313,7 +322,9 @@ public List performDryRun(Transformer transformer, E List> resultSets = results.get(0).getData(); for (Map row : resultSets) { - dataErrors.add(constructDataError(row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName())); + DataError dataError = constructDataError(row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName()); + dataErrors.add(dataError); + dataErrorsByCategory.get(DATATYPE_CONVERSION).add(dataError); } } } @@ -321,6 +332,13 @@ public List performDryRun(Transformer transformer, E } } - return dataErrors; + if (dataErrors.size() <= sampleRowCount) + { + return dataErrors; + } + else + { + return getDataErrorsWithEqualDistributionAcrossCategories(sampleRowCount, dataErrorsByCategory); + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index c36a3f65d1b..5dc498e94bc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -78,6 +78,7 @@ public class BulkLoadTest extends BaseTest private static final String COL_STRING = "col_string"; private static final String COL_DECIMAL = "col_decimal"; private static final String COL_DATETIME = "col_datetime"; + private static final String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; private static Field col1 = Field.builder() .name(COL_INT) @@ -786,6 +787,7 @@ public void testBulkLoadDryRunSuccess() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -807,11 +809,41 @@ public void testBulkLoadDryRunSuccess() Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); - // TODO: convert these to assertions - System.out.println(operations.dryRunPreActionsSql()); - System.out.println(operations.dryRunSql()); - System.out.println(operations.dryRunValidationSql()); - System.out.println(operations.dryRunPostCleanupSql()); + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\"" + + "(\"col_int\" VARCHAR,\"col_string\" VARCHAR,\"col_decimal\" VARCHAR,\"col_datetime\" VARCHAR,\"FILE\" VARCHAR,\"ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"FILE\", \"ROW_NUMBER\") " + + "SELECT CONVERT(\"col_int\",VARCHAR),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",VARCHAR),CONVERT(\"col_datetime\",VARCHAR)," + + "'src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_decimal\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_decimal\" AS DECIMAL(5,2)) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertNull(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql1)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql2)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql3)); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); // Verify execution using ingestor @@ -859,6 +891,7 @@ public void testBulkLoadDryRunFailure() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -880,11 +913,46 @@ public void testBulkLoadDryRunFailure() Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); - // TODO: convert these to assertions - System.out.println(operations.dryRunPreActionsSql()); - System.out.println(operations.dryRunSql()); - System.out.println(operations.dryRunValidationSql()); - System.out.println(operations.dryRunPostCleanupSql()); + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\"" + + "(\"col_int\" VARCHAR,\"col_string\" VARCHAR,\"col_decimal\" VARCHAR,\"col_datetime\" VARCHAR,\"FILE\" VARCHAR,\"ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"FILE\", \"ROW_NUMBER\") " + + "SELECT CONVERT(\"col_int\",VARCHAR),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",VARCHAR),CONVERT(\"col_datetime\",VARCHAR)," + + "'src/test/resources/data/bulk-load/input/bad_file.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + String expectedDryRunNullValidationSql = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (main_validation_lp_yosulf.\"col_string\" IS NULL) OR (main_validation_lp_yosulf.\"col_decimal\" IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_decimal\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_decimal\" AS DECIMAL(5,2)) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).containsValue(expectedDryRunNullValidationSql)); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).keySet().size()); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql1)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql2)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql3)); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); // Verify execution using ingestor @@ -936,6 +1004,8 @@ public void testBulkLoadDryRunFailure() Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); } + // TODO: add test for sample row count + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, Optional eventId) { return RelationalIngestor.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 2ad57571f21..acca6cf9ecc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -92,10 +92,12 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Properties; +import java.util.Queue; import java.util.Set; import java.util.ArrayList; import java.util.stream.Collectors; @@ -142,7 +144,7 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); capabilities.add(Capability.DRY_RUN); - capabilities.add(Capability.SAFE_CAST); + capabilities.add(Capability.TRY_CAST); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -304,11 +306,17 @@ private List performDryRunWithValidationQueries(Executor dataErrors = new ArrayList<>(); + Map> dataErrorsByCategory = new HashMap<>(); + for (ValidationCategory validationCategory : ValidationCategory.values()) + { + dataErrorsByCategory.put(validationCategory, new LinkedList<>()); + } + for (ValidationCategory validationCategory : dryRunValidationSqlPlan.keySet()) { for (Set validatedColumns : dryRunValidationSqlPlan.get(validationCategory).keySet()) { - List results = executor.executePhysicalPlanAndGetResults(dryRunValidationSqlPlan.get(validationCategory).get(validatedColumns), sampleRowCount); + List results = executor.executePhysicalPlanAndGetResults(dryRunValidationSqlPlan.get(validationCategory).get(validatedColumns)); if (!results.isEmpty()) { List> resultSets = results.get(0).getData(); @@ -318,14 +326,24 @@ private List performDryRunWithValidationQueries(Executor metaIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT NOT NULL,\"col_variant\" VARIANT,\"batch_id\" INTEGER)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT NOT NULL,\"col_variant\" VARIANT NOT NULL,\"batch_id\" INTEGER)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_bigint\", \"col_variant\", \"batch_id\") " + "FROM " + @@ -234,16 +243,40 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetaIngestSql, metaIngestSql.get(0)); - // TODO: convert these to assertions - System.out.println(operations.dryRunPreActionsSql()); - System.out.println(operations.dryRunSql()); - System.out.println(operations.dryRunValidationSql()); - System.out.println(operations.dryRunPostCleanupSql()); - Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); Assertions.assertNull(statsSql.get(ROWS_TERMINATED)); Assertions.assertNull(statsSql.get(ROWS_UPDATED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + + "(\"col_bigint\" VARCHAR,\"col_variant\" VARCHAR,\"FILE\" VARCHAR,\"ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" (\"col_bigint\", \"col_variant\", \"FILE\", \"ROW_NUMBER\") " + + "FROM (SELECT t.$4 as \"col_bigint\",t.$5 as \"col_variant\",METADATA$FILENAME,METADATA$FILE_ROW_NUMBER FROM my_location as t) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') FILE_FORMAT = (TYPE = 'AVRO') ON_ERROR = 'ABORT_STATEMENT'"; + + String expectedDryRunNullValidationSql = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"FILE\",my_name_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + + "WHERE (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL) OR (my_name_validation_lp_yosulf.\"col_variant\" IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"FILE\",my_name_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf WHERE " + + "((NOT (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_bigint\" AS BIGINT) IS NULL)) " + + "OR ((NOT (my_name_validation_lp_yosulf.\"col_variant\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_variant\" AS VARIANT) IS NULL)) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).containsValue(expectedDryRunNullValidationSql)); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).keySet().size()); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql)); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); } @Test @@ -639,7 +672,8 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; - String expectedDryRunSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + + String expectedDryRunDeleteSql = "DELETE FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf"; + String expectedDryRunLoadSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE' " + @@ -649,7 +683,8 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedDryRunPreActionsSql, operations.dryRunPreActionsSql().get(0)); - Assertions.assertEquals(expectedDryRunSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSql, operations.dryRunSql().get(1)); Assertions.assertTrue(operations.dryRunValidationSql().isEmpty()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); From 989aa2c073a1f9364b717380339a2867ca3f2599 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 29 Feb 2024 17:13:07 +0800 Subject: [PATCH 16/32] Address comments --- .../components/planner/BulkLoadPlanner.java | 4 +- .../components/util/LogicalPlanUtils.java | 38 +++++++++++++------ .../relational/ansi/AnsiSqlSink.java | 13 ++----- .../components/relational/RelationalSink.java | 2 +- .../api/RelationalIngestorAbstract.java | 3 +- .../components/relational/h2/H2Sink.java | 8 ++-- .../visitor/StagedFilesDatasetVisitor.java | 2 +- .../ingestmode/bulkload/BulkLoadTest.java | 12 +++--- .../relational/snowflake/SnowflakeSink.java | 10 +++-- .../visitor/StagedFilesDatasetVisitor.java | 2 +- 10 files changed, 51 insertions(+), 43 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 91a64571fa7..d6e99ccd683 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -160,7 +160,7 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) } else { - List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), true); + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValuesWithVarCharType(stagingDataset()); fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); fieldsToSelect.add(MetadataRowNumberField.builder().build()); @@ -261,7 +261,7 @@ private Selection getSelectColumnsWithCast(Dataset dataset, Field fieldToCheckFo private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { - List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset(), false); + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); // Add digest diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 49f476a3c1f..da880eee09c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -396,28 +396,42 @@ public static List findCommonPrimaryFieldsBetweenMainAndStaging(Dataset m return stagingDataset.schema().fields().stream().filter(field -> field.primaryKey() && primaryKeysFromMain.contains(field.name())).collect(Collectors.toList()); } - // TODO: another method: public static List extractStagedFilesFieldValuesWithVarCharType(Dataset dataset) - public static List extractStagedFilesFieldValues(Dataset dataset, boolean withVarCharType) + public static List extractStagedFilesFieldValues(Dataset dataset) { List stagedFilesFields = new ArrayList<>(); boolean columnNumbersPresent = dataset.schema().fields().stream().allMatch(field -> field.columnNumber().isPresent()); int iter = 1; for (Field field : dataset.schema().fields()) { - // TODO: extract this part into a private method that takes the desired type - StagedFilesFieldValue fieldValue = StagedFilesFieldValue.builder() - .columnNumber(columnNumbersPresent ? field.columnNumber().get() : iter++) - .datasetRefAlias(dataset.datasetReference().alias()) - .alias(field.fieldAlias().isPresent() ? field.fieldAlias().get() : field.name()) - .elementPath(field.elementPath()) - .fieldType(withVarCharType ? FieldType.builder().dataType(VARCHAR).build() : field.type()) - .fieldName(field.name()) - .build(); - stagedFilesFields.add(fieldValue); + stagedFilesFields.add(getStagedFilesFieldValueWithType(dataset, field, field.type(), columnNumbersPresent, iter++)); } return stagedFilesFields; } + public static List extractStagedFilesFieldValuesWithVarCharType(Dataset dataset) + { + List stagedFilesFields = new ArrayList<>(); + boolean columnNumbersPresent = dataset.schema().fields().stream().allMatch(field -> field.columnNumber().isPresent()); + int iter = 1; + for (Field field : dataset.schema().fields()) + { + stagedFilesFields.add(getStagedFilesFieldValueWithType(dataset, field, FieldType.builder().dataType(VARCHAR).build(), columnNumbersPresent, iter++)); + } + return stagedFilesFields; + } + + public static StagedFilesFieldValue getStagedFilesFieldValueWithType(Dataset dataset, Field field, FieldType fieldType, boolean columnNumbersPresent, int counter) + { + return StagedFilesFieldValue.builder() + .columnNumber(columnNumbersPresent ? field.columnNumber().get() : counter) + .datasetRefAlias(dataset.datasetReference().alias()) + .alias(field.fieldAlias().isPresent() ? field.fieldAlias().get() : field.name()) + .elementPath(field.elementPath()) + .fieldType(fieldType) + .fieldName(field.name()) + .build(); + } + public static Dataset getTempDataset(Datasets datasets) { String mainDatasetName = datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index fe1827ec837..11846f0d971 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -330,7 +330,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) { throw new UnsupportedOperationException("DryRun not supported!"); } @@ -349,22 +349,15 @@ protected Optional getLong(Map row, String key) return Optional.ofNullable(longValue); } - protected DataError constructDataError(Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) + protected DataError constructDataError(List allColumns, Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) { - // TODO: follow the order of schema object - String commaSeparatedRow = row.keySet().stream() - .sorted() - .filter(key -> !key.equals(fileNameColumnName) && !key.equals(rowNumberColumnName)) - .map(key -> getString(row, key).orElse("")) - .collect(Collectors.joining(",")); - return DataError.builder() .errorMessage(validationCategory.getValidationFailedErrorMessage()) .file(getString(row, fileNameColumnName).orElseThrow(IllegalStateException::new)) .errorCategory(validationCategory.name()) .columnName(validatedColumnName) .rowNumber(getLong(row, rowNumberColumnName)) - .rejectedRecord(commaSeparatedRow) + .rejectedRecord(allColumns.stream().map(column -> getString(row, column).orElse("")).collect(Collectors.joining(","))) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 09991870237..4e9b4f8cbeb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -194,5 +194,5 @@ public interface ConstructDatasetFromDatabase public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); - public abstract List performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount); + public abstract List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 8a8c1c2b7ac..394b0aac330 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -540,8 +540,7 @@ private List performDryRun() if (enrichedIngestMode instanceof BulkLoad) { executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); - // TODO: pass enrichedDatasets.stagingDataset() to the following method - List results = relationalSink().performDryRun(transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); + List results = relationalSink().performDryRun(enrichedDatasets, transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); executor.executePhysicalPlan(generatorResult.dryRunPostCleanupSqlPlan()); return results; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 626b9a136fb..3644a01ab5a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -262,7 +262,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) { executor.executePhysicalPlan(dryRunSqlPlan); @@ -273,6 +273,8 @@ public List performDryRun(Transformer transformer, E dataErrorsByCategory.put(validationCategory, new LinkedList<>()); } + List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + Map, SqlPlan> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUES, new HashMap<>()); Map, SqlPlan> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(DATATYPE_CONVERSION, new HashMap<>()); @@ -289,7 +291,7 @@ public List performDryRun(Transformer transformer, E { if (row.get(column) == null) { - DataError dataError = constructDataError(row, FILE, ROW_NUMBER, NULL_VALUES, column); + DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, NULL_VALUES, column); dataErrors.add(dataError); dataErrorsByCategory.get(NULL_VALUES).add(dataError); } @@ -322,7 +324,7 @@ public List performDryRun(Transformer transformer, E List> resultSets = results.get(0).getData(); for (Map row : resultSets) { - DataError dataError = constructDataError(row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName()); + DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName()); dataErrors.add(dataError); dataErrorsByCategory.get(DATATYPE_CONVERSION).add(dataError); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java index 810b9028b6a..068721db03f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java @@ -29,7 +29,7 @@ public class StagedFilesDatasetVisitor implements LogicalPlanVisitor allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current, false); + List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); StagedFilesSelection selection = StagedFilesSelection.builder() .source(current) .addAllFields(allColumns) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 5dc498e94bc..befd8723dc1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -968,35 +968,35 @@ public void testBulkLoadDryRunFailure() .errorCategory(ValidationCategory.NULL_VALUES.name()) .rowNumber(1L) .columnName(col3NonNullable.name()) - .rejectedRecord("2022-01-99 00:00:00.0,,??,Andy") + .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.NULL_VALUES.name()) .rowNumber(2L) .columnName(col2NonNullable.name()) - .rejectedRecord("2022-01-12 00:00:00.0,NaN,2,") + .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) .rowNumber(1L) .columnName(col1.name()) - .rejectedRecord("2022-01-99 00:00:00.0,,??,Andy") + .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) .rowNumber(1L) .columnName(col4.name()) - .rejectedRecord("2022-01-99 00:00:00.0,,??,Andy") + .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) .rowNumber(2L) .columnName(col3.name()) - .rejectedRecord("2022-01-12 00:00:00.0,NaN,2,") + .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Unable to type cast column") .build()); @@ -1004,8 +1004,6 @@ public void testBulkLoadDryRunFailure() Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); } - // TODO: add test for sample row count - RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, Optional eventId) { return RelationalIngestor.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index acca6cf9ecc..05fe2de1c45 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -262,7 +262,7 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } - public List performDryRun(Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) { if (dryRunValidationSqlPlan == null || dryRunValidationSqlPlan.isEmpty()) { @@ -270,7 +270,7 @@ public List performDryRun(Transformer transformer, E } else { - return performDryRunWithValidationQueries(executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount); + return performDryRunWithValidationQueries(datasets, executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount); } } @@ -301,7 +301,7 @@ private List performDryRunWithValidationMode(Executor performDryRunWithValidationQueries(Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + private List performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) { executor.executePhysicalPlan(dryRunSqlPlan); @@ -312,6 +312,8 @@ private List performDryRunWithValidationQueries(Executor()); } + List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + for (ValidationCategory validationCategory : dryRunValidationSqlPlan.keySet()) { for (Set validatedColumns : dryRunValidationSqlPlan.get(validationCategory).keySet()) @@ -326,7 +328,7 @@ private List performDryRunWithValidationQueries(Executor allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current, false); + List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); StagedFilesSelection selection = StagedFilesSelection.builder() .source(current) .addAllFields(allColumns) From e36cf4b8a36cfa57807ca0e5d23ee815af42d285 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Fri, 1 Mar 2024 13:23:24 +0800 Subject: [PATCH 17/32] Add sorting for h2 datatype checks and add test for sample row count and upper case --- .../components/relational/h2/H2Sink.java | 15 +- .../ingestmode/bulkload/BulkLoadTest.java | 147 ++++++++++++++++++ 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 3644a01ab5a..b66d1d9d999 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -301,7 +301,20 @@ public List performDryRun(Datasets datasets, Transformer validatedColumns : queriesForDatatype.keySet()) + // Sort the map keys first to achieve deterministic results + List> sortedMapKeysForDatatype = queriesForDatatype.keySet().stream().sorted((o1, o2) -> + { + // There is only one element in the set + Optional fieldValue1 = o1.stream().findAny(); + Optional fieldValue2 = o2.stream().findAny(); + if (fieldValue1.isPresent() && fieldValue2.isPresent()) + { + return fieldValue1.get().fieldName().compareTo(fieldValue2.get().fieldName()); + } + return 0; + }).collect(Collectors.toList()); + + for (Set validatedColumns : sortedMapKeysForDatatype) { try { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index befd8723dc1..69e0385ef38 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -1004,6 +1004,153 @@ public void testBulkLoadDryRunFailure() Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); } + @Test + public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() + { + String filePath = "src/test/resources/data/bulk-load/input/bad_file.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2NonNullable, col3NonNullable, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) + .sampleRowCount(3) + .caseConversion(CaseConversion.TO_UPPER) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN\"" + + "(\"COL_INT\" INTEGER,\"COL_STRING\" VARCHAR NOT NULL,\"COL_DECIMAL\" DECIMAL(5,2) NOT NULL,\"COL_DATETIME\" TIMESTAMP,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN\" " + + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"BATCH_ID\", \"APPEND_TIME\") " + + "SELECT CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"BATCH_ID\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\"" + + "(\"COL_INT\" VARCHAR,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" VARCHAR,\"COL_DATETIME\" VARCHAR,\"FILE\" VARCHAR,\"ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" " + + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"FILE\", \"ROW_NUMBER\") " + + "SELECT CONVERT(\"COL_INT\",VARCHAR),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",VARCHAR),CONVERT(\"COL_DATETIME\",VARCHAR),'src/test/resources/data/bulk-load/input/bad_file.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; + + String expectedDryRunNullValidationSql = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (MAIN_validation_lp_yosulf.\"COL_STRING\" IS NULL) OR (MAIN_validation_lp_yosulf.\"COL_DECIMAL\" IS NULL) LIMIT 3"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_INT\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_INT\" AS INTEGER) IS NULL) LIMIT 3"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_DECIMAL\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_DECIMAL\" AS DECIMAL(5,2)) IS NULL) LIMIT 3"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_DATETIME\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_DATETIME\" AS TIMESTAMP) IS NULL) LIMIT 3"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).containsValue(expectedDryRunNullValidationSql)); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).keySet().size()); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql1)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql2)); + Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql3)); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER, 3); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + List expectedErrorRecords = Arrays.asList(DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.NULL_VALUES.name()) + .rowNumber(1L) + .columnName(col3NonNullable.name().toUpperCase()) + .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorMessage("Null values found in non-nullable column") + .build(), DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.NULL_VALUES.name()) + .rowNumber(2L) + .columnName(col2NonNullable.name().toUpperCase()) + .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorMessage("Null values found in non-nullable column") + .build(), DataError.builder() + .file(filePath) + .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .rowNumber(1L) + .columnName(col4.name().toUpperCase()) + .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorMessage("Unable to type cast column") + .build()); + + Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); + Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); + } + + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, int sampleRowCount) + { + return RelationalIngestor.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .executionTimestampClock(executionTimestampClock) + .cleanupStagingData(options.cleanupStagingData()) + .collectStatistics(options.collectStatistics()) + .enableConcurrentSafety(true) + .caseConversion(caseConversion) + .sampleRowCount(sampleRowCount) + .build(); + } + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, Optional eventId) { return RelationalIngestor.builder() From e0772511e7db77180a28cebadd3fbf635ba2f5a7 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Mar 2024 10:59:01 +0800 Subject: [PATCH 18/32] Address comments --- .../values/CastFunctionAbstract.java | 3 + .../values/TryCastFunctionAbstract.java | 3 + .../components/planner/BulkLoadPlanner.java | 62 ++++++++- .../relational/ansi/AnsiSqlSink.java | 24 ++-- .../util/DataErrorFairDistributionTest.java | 118 ++++++++++++++++++ .../sqldom/schemaops/WindowFunctionTest.java | 8 ++ .../components/relational/h2/H2Sink.java | 13 +- .../relational/snowflake/SnowflakeSink.java | 11 +- 8 files changed, 206 insertions(+), 36 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java index 6e7677776e8..9392c993c96 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java @@ -17,6 +17,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Parameter; import static org.immutables.value.Value.Style; @Immutable @@ -29,7 +30,9 @@ ) public interface CastFunctionAbstract extends Value { + @Parameter(order = 0) Value field(); + @Parameter(order = 1) FieldType type(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java index f64d0bbdd8a..11222de8584 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java @@ -17,6 +17,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Parameter; import static org.immutables.value.Value.Style; @Immutable @@ -29,7 +30,9 @@ ) public interface TryCastFunctionAbstract extends Value { + @Parameter(order = 0) Value field(); + @Parameter(order = 1) FieldType type(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index d6e99ccd683..0305ee41f80 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -105,7 +105,7 @@ class BulkLoadPlanner extends Planner if (capabilities.contains(Capability.DRY_RUN)) { - validationDataset = stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported() ? getValidationDataset() : getValidationDatasetWithMetaColumns(); + validationDataset = stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported() ? getValidationModeDataset() : getGenericValidationDataset(); } } @@ -137,6 +137,23 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } } + /* + ------------------ + Validation Mode Logic: + ------------------ + COPY INTO temp_table (data_columns) + SELECT data_columns from staging + WITH VALIDATION_MODE = true + + ------------------ + Generic Approach Logic: + ------------------ + modified_data_columns: nullable data_columns with String datatype + meta_columns: file_name, row_number + + COPY INTO temp_table (modified_data_columns, meta_columns) + SELECT modified_data_columns, meta_columns from staging + */ @Override public LogicalPlan buildLogicalPlanForDryRun(Resources resources) { @@ -182,6 +199,41 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) return LogicalPlan.of(operations); } + /* + ------------------ + Validation Mode Logic: + ------------------ + NOT APPLICABLE + + ------------------ + Generic Approach with TRY_CAST Logic: + ------------------ + For null values: + SELECT * FROM temp_table WHERE + (non_nullable_data_column_1 = NULL + OR non_nullable_data_column_2 = NULL + OR ...) + + For datatype conversion: + SELECT * FROM temp_table WHERE + ((non_string_data_column_1 != NULL AND TRY_CAST(non_string_data_column_1 AS datatype) = NULL) + OR (non_string_data_column_2 != NULL AND TRY_CAST(non_string_data_column_2 AS datatype) = NULL) + OR ...) + + ------------------ + Generic Approach with CAST Logic: + ------------------ + For null values: + SELECT * FROM temp_table WHERE + (non_nullable_data_column_1 = NULL + OR non_nullable_data_column_2 = NULL + OR ...) + + For datatype conversion: + SELECT * FROM temp_table WHERE (non_string_data_column_1 != NULL AND CAST(non_string_data_column_1 AS datatype) = NULL) + SELECT * FROM temp_table WHERE (non_string_data_column_2 != NULL AND CAST(non_string_data_column_2 AS datatype) = NULL) + ... + */ @Override public Map, LogicalPlan>> buildLogicalPlanForDryRunValidation(Resources resources) { @@ -239,7 +291,7 @@ private Selection getSelectColumnsWithTryCast(Dataset dataset, List field .source(dataset) .condition(Or.of(fieldsToCheckForDatatype.stream().map(field -> And.builder() .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build()))) - .addConditions(IsNull.of(TryCastFunction.builder().field(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build()).type(field.type()).build())) + .addConditions(IsNull.of(TryCastFunction.of(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build(), field.type()))) .build()) .collect(Collectors.toList()))) .limit(options().sampleRowCount()) @@ -253,7 +305,7 @@ private Selection getSelectColumnsWithCast(Dataset dataset, Field fieldToCheckFo .source(dataset) .condition(And.builder() .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()))) - .addConditions(IsNull.of(CastFunction.builder().field(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()).type(fieldToCheckForDatatype.type()).build())) + .addConditions(IsNull.of(CastFunction.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build(), fieldToCheckForDatatype.type()))) .build()) .limit(options().sampleRowCount()) .build(); @@ -424,7 +476,7 @@ protected void addPostRunStatsForRowsDeleted(Map pos // Not supported at the moment } - private Dataset getValidationDataset() + private Dataset getValidationModeDataset() { String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); String validationDatasetName = TableNameGenUtils.generateTableName(tableName, "validation", options().ingestRunId()); @@ -436,7 +488,7 @@ private Dataset getValidationDataset() .build(); } - private Dataset getValidationDatasetWithMetaColumns() + private Dataset getGenericValidationDataset() { String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); String validationDatasetName = TableNameGenUtils.generateTableName(tableName, "validation", options().ingestRunId()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 11846f0d971..4b0176aad56 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -361,29 +361,35 @@ protected DataError constructDataError(List allColumns, Map getDataErrorsWithEqualDistributionAcrossCategories(int sampleRowCount, Map> dataErrorsByCategory) + public List getDataErrorsWithFairDistributionAcrossCategories(int sampleRowCount, Map> dataErrorsByCategory) { - List dataErrors = new ArrayList<>(); - Set exhaustedCategories = new HashSet<>(); + List totalErrors = dataErrorsByCategory.values().stream().flatMap(Collection::stream).collect(Collectors.toList()); + if (totalErrors.size() <= sampleRowCount) + { + return totalErrors; + } + + List fairlyDistributedDataErrors = new ArrayList<>(); + List eligibleCategories = new ArrayList<>(Arrays.asList(ValidationCategory.values())); - while (dataErrors.size() < sampleRowCount && exhaustedCategories.size() != ValidationCategory.values().length) + while (fairlyDistributedDataErrors.size() < sampleRowCount && !eligibleCategories.isEmpty()) { - for (ValidationCategory validationCategory : ValidationCategory.values()) + for (ValidationCategory validationCategory : eligibleCategories) { if (!dataErrorsByCategory.get(validationCategory).isEmpty()) { - if (dataErrors.size() < sampleRowCount) + if (fairlyDistributedDataErrors.size() < sampleRowCount) { - dataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); + fairlyDistributedDataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); } } else { - exhaustedCategories.add(validationCategory); + eligibleCategories.remove(validationCategory); } } } - return dataErrors; + return fairlyDistributedDataErrors; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java new file mode 100644 index 00000000000..3ca9ec1289a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -0,0 +1,118 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.stream.Collectors; + +public class DataErrorFairDistributionTest +{ + @Test + public void testTotalErrorsSmallerThanSampleRowCount() + { + AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); + + Map> dataErrorsByCategory = new HashMap<>(); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUES, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.DATATYPE_CONVERSION, new LinkedList<>()); + List expectedNullValuesErrors = new ArrayList<>(); + List expectedDatatypeErrors = new ArrayList<>(); + + populateDataErrors(ValidationCategory.NULL_VALUES, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.DATATYPE_CONVERSION, 5, 5, dataErrorsByCategory, expectedDatatypeErrors); + + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, dataErrorsByCategory); + Assertions.assertEquals(10, results.size()); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.NULL_VALUES.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.DATATYPE_CONVERSION.name())).collect(Collectors.toList())); + } + + @Test + public void testExhaustingOneCategory() + { + AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); + + Map> dataErrorsByCategory = new HashMap<>(); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUES, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.DATATYPE_CONVERSION, new LinkedList<>()); + List expectedNullValuesErrors = new ArrayList<>(); + List expectedDatatypeErrors = new ArrayList<>(); + + populateDataErrors(ValidationCategory.NULL_VALUES, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.DATATYPE_CONVERSION, 50, 15, dataErrorsByCategory, expectedDatatypeErrors); + + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, dataErrorsByCategory); + Assertions.assertEquals(20, results.size()); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.NULL_VALUES.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.DATATYPE_CONVERSION.name())).collect(Collectors.toList())); + } + + @Test + public void testExhaustingBothCategories() + { + AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); + + Map> dataErrorsByCategory = new HashMap<>(); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUES, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.DATATYPE_CONVERSION, new LinkedList<>()); + List expectedNullValuesErrors = new ArrayList<>(); + List expectedDatatypeErrors = new ArrayList<>(); + + populateDataErrors(ValidationCategory.NULL_VALUES, 15, 10, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.DATATYPE_CONVERSION, 20, 9, dataErrorsByCategory, expectedDatatypeErrors); + + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(19, dataErrorsByCategory); + Assertions.assertEquals(19, results.size()); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.NULL_VALUES.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.DATATYPE_CONVERSION.name())).collect(Collectors.toList())); + } + + private void populateDataErrors(ValidationCategory category, int totalCount, int expectedCount, Map> dataErrorsByCategory, List expectedList) + { + int count = 1; + while (count <= totalCount) + { + DataError dataError = getDummyDataError(category, count); + dataErrorsByCategory.get(category).add(dataError); + if (count <= expectedCount) + { + expectedList.add(dataError); + } + count++; + } + } + + private DataError getDummyDataError(ValidationCategory category, long rowNumber) + { + return DataError.builder() + .file("some_file_name") + .errorCategory(category.name()) + .rowNumber(rowNumber) + .columnName("some_column_name") + .rejectedRecord("some_data") + .errorMessage(category.getValidationFailedErrorMessage()) + .build(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java index 7e4c0659ade..771fbd635e4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java @@ -81,4 +81,12 @@ void testWithPartitionFieldsAndOrderByFields() assertEquals("ROW_NUMBER() OVER (PARTITION BY stage.\"field1\",stage.\"field2\" ORDER BY stage.\"field1\" ASC,stage.\"field2\")", sql); } + @Test + void testPartitionAndOrderByBothEmpty() + { + Function rowNumber = new Function(FunctionName.ROW_NUMBER, null, BaseTest.QUOTE_IDENTIFIER); + WindowFunction windowFunction = new WindowFunction(BaseTest.QUOTE_IDENTIFIER, rowNumber, null, null); + String sql = BaseTest.genSql(windowFunction); + assertEquals("ROW_NUMBER() OVER ()", sql); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index b66d1d9d999..cbac4edb965 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -14,7 +14,6 @@ package org.finos.legend.engine.persistence.components.relational.h2; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Optional; @@ -266,7 +265,6 @@ public List performDryRun(Datasets datasets, Transformer dataErrors = new ArrayList<>(); Map> dataErrorsByCategory = new HashMap<>(); for (ValidationCategory validationCategory : ValidationCategory.values()) { @@ -292,7 +290,6 @@ public List performDryRun(Datasets datasets, Transformer performDryRun(Datasets datasets, Transformer row : resultSets) { DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName()); - dataErrors.add(dataError); dataErrorsByCategory.get(DATATYPE_CONVERSION).add(dataError); } } @@ -347,13 +343,6 @@ public List performDryRun(Datasets datasets, Transformer performDryRunWithValidationQueries(Datasets datasets, Ex { executor.executePhysicalPlan(dryRunSqlPlan); - List dataErrors = new ArrayList<>(); Map> dataErrorsByCategory = new HashMap<>(); for (ValidationCategory validationCategory : ValidationCategory.values()) { @@ -329,7 +328,6 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex if (row.get(column) == null) { DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, validationCategory, column); - dataErrors.add(dataError); dataErrorsByCategory.get(validationCategory).add(dataError); } } @@ -338,14 +336,7 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex } } - if (dataErrors.size() <= sampleRowCount) - { - return dataErrors; - } - else - { - return getDataErrorsWithEqualDistributionAcrossCategories(sampleRowCount, dataErrorsByCategory); - } + return getDataErrorsWithFairDistributionAcrossCategories(sampleRowCount, dataErrorsByCategory); } @Override From 3a5f6263fa9f1aa182d0f62d388d27536cd34ac0 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Mar 2024 15:59:57 +0800 Subject: [PATCH 19/32] Fix snowflake datatype check - to check column by column and other fixes --- .../pom.xml | 10 ++ .../values/CastFunctionAbstract.java | 38 -------- .../components/planner/BulkLoadPlanner.java | 94 +++++-------------- .../components/planner/Planner.java | 3 +- .../components/util/Capability.java | 3 +- .../components/util/ValidationCategory.java | 14 +-- .../pom.xml | 6 ++ .../relational/ansi/AnsiSqlSink.java | 26 +++-- .../util/DataErrorFairDistributionTest.java | 46 ++++----- .../components/relational/RelationalSink.java | 3 +- .../api/GeneratorResultAbstract.java | 16 ++-- .../api/RelationalGeneratorAbstract.java | 15 +-- .../pom.xml | 7 ++ .../components/relational/h2/H2Sink.java | 56 +++++------ ...sitor.java => TryCastFunctionVisitor.java} | 12 +-- .../ingestmode/bulkload/BulkLoadTest.java | 60 ++++++------ .../pom.xml | 7 ++ .../relational/snowflake/SnowflakeSink.java | 16 ++-- .../values/MetadataRowNumberColumn.java | 1 + .../components/ingestmode/BulkLoadTest.java | 33 +++---- 20 files changed, 215 insertions(+), 251 deletions(-) delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/{CastFunctionVisitor.java => TryCastFunctionVisitor.java} (73%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml index 13ef09997d8..e32a9fc26dd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml @@ -42,5 +42,15 @@ jackson-databind + + + org.eclipse.collections + eclipse-collections-api + + + org.eclipse.collections + eclipse-collections + + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java deleted file mode 100644 index 9392c993c96..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/CastFunctionAbstract.java +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2024 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.logicalplan.values; - -import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; - -import static org.immutables.value.Value.Immutable; -import static org.immutables.value.Value.Parameter; -import static org.immutables.value.Value.Style; - -@Immutable -@Style( - typeAbstract = "*Abstract", - typeImmutable = "*", - jdkOnly = true, - optionalAcceptNullable = true, - strictBuilder = true -) -public interface CastFunctionAbstract extends Value -{ - @Parameter(order = 0) - Value field(); - - @Parameter(order = 1) - FieldType type(); -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 0305ee41f80..b605d14f487 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.planner; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; @@ -36,7 +38,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.CastFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; @@ -163,7 +164,6 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) } List operations = new ArrayList<>(); - operations.add(Delete.builder().dataset(validationDataset).build()); if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) { @@ -177,6 +177,8 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) } else { + operations.add(Delete.builder().dataset(validationDataset).build()); + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValuesWithVarCharType(stagingDataset()); fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); fieldsToSelect.add(MetadataRowNumberField.builder().build()); @@ -206,22 +208,7 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) NOT APPLICABLE ------------------ - Generic Approach with TRY_CAST Logic: - ------------------ - For null values: - SELECT * FROM temp_table WHERE - (non_nullable_data_column_1 = NULL - OR non_nullable_data_column_2 = NULL - OR ...) - - For datatype conversion: - SELECT * FROM temp_table WHERE - ((non_string_data_column_1 != NULL AND TRY_CAST(non_string_data_column_1 AS datatype) = NULL) - OR (non_string_data_column_2 != NULL AND TRY_CAST(non_string_data_column_2 AS datatype) = NULL) - OR ...) - - ------------------ - Generic Approach with CAST Logic: + Generic Approach Logic: ------------------ For null values: SELECT * FROM temp_table WHERE @@ -230,18 +217,17 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) OR ...) For datatype conversion: - SELECT * FROM temp_table WHERE (non_string_data_column_1 != NULL AND CAST(non_string_data_column_1 AS datatype) = NULL) - SELECT * FROM temp_table WHERE (non_string_data_column_2 != NULL AND CAST(non_string_data_column_2 AS datatype) = NULL) + SELECT * FROM temp_table WHERE (non_string_data_column_1 != NULL AND TRY_CAST(non_string_data_column_1 AS datatype) = NULL) + SELECT * FROM temp_table WHERE (non_string_data_column_2 != NULL AND TRY_CAST(non_string_data_column_2 AS datatype) = NULL) ... */ - @Override - public Map, LogicalPlan>> buildLogicalPlanForDryRunValidation(Resources resources) + public Map, LogicalPlan>>> buildLogicalPlanForDryRunValidation(Resources resources) { if (!capabilities.contains(Capability.DRY_RUN) || stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) { return Collections.emptyMap(); } - Map, LogicalPlan>> validationMap = new HashMap<>(); + Map, LogicalPlan>>> validationMap = new HashMap<>(); List fieldsToCheckForNull = stagingDataset().schema().fields().stream().filter(field -> !field.nullable()).collect(Collectors.toList()); List fieldsToCheckForDatatype = stagingDataset().schema().fields().stream().filter(field -> !DataType.isStringDatatype(field.type().dataType())).collect(Collectors.toList()); @@ -255,62 +241,34 @@ public Map, LogicalPlan>> buildLogicalPl .limit(options().sampleRowCount()) .build(); - validationMap.put(ValidationCategory.NULL_VALUES, - Collections.singletonMap(fieldsToCheckForNull.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), - LogicalPlan.of(Collections.singletonList(queryForNull)))); + validationMap.put(ValidationCategory.CHECK_CONSTRAINT, + Collections.singletonList(Tuples.pair(fieldsToCheckForNull.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForNull))))); } if (!fieldsToCheckForDatatype.isEmpty()) { - if (capabilities.contains(Capability.TRY_CAST)) - { - Selection queryForDatatype = getSelectColumnsWithTryCast(validationDataset, fieldsToCheckForDatatype); - validationMap.put(ValidationCategory.DATATYPE_CONVERSION, - Collections.singletonMap(fieldsToCheckForDatatype.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), - LogicalPlan.of(Collections.singletonList(queryForDatatype)))); - } - else + validationMap.put(ValidationCategory.CONVERSION, new ArrayList<>()); + + for (Field fieldToCheckForDatatype : fieldsToCheckForDatatype) { - validationMap.put(ValidationCategory.DATATYPE_CONVERSION, new HashMap<>()); - for (Field fieldToCheckForDatatype : fieldsToCheckForDatatype) - { - Selection queryForDatatype = getSelectColumnsWithCast(validationDataset, fieldToCheckForDatatype); - validationMap.get(ValidationCategory.DATATYPE_CONVERSION).put(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), - LogicalPlan.of(Collections.singletonList(queryForDatatype))); - } + Selection queryForDatatype = Selection.builder() + .source(validationDataset) + .condition(And.builder() + .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(validationDataset.datasetReference()).build()))) + .addConditions(IsNull.of(TryCastFunction.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(validationDataset.datasetReference()).build(), fieldToCheckForDatatype.type()))) + .build()) + .limit(options().sampleRowCount()) + .build(); + + validationMap.get(ValidationCategory.CONVERSION).add(Tuples.pair(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForDatatype)))); } } return validationMap; } - private Selection getSelectColumnsWithTryCast(Dataset dataset, List fieldsToCheckForDatatype) - { - // When using TRY_CAST, we can check all columns at once, as the query will not fail - return Selection.builder() - .source(dataset) - .condition(Or.of(fieldsToCheckForDatatype.stream().map(field -> And.builder() - .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build()))) - .addConditions(IsNull.of(TryCastFunction.of(FieldValue.builder().fieldName(field.name()).datasetRef(dataset.datasetReference()).build(), field.type()))) - .build()) - .collect(Collectors.toList()))) - .limit(options().sampleRowCount()) - .build(); - } - - private Selection getSelectColumnsWithCast(Dataset dataset, Field fieldToCheckForDatatype) - { - // When using CAST, we have to check column by column as the query may fail and we need to know which column we have a problem in - return Selection.builder() - .source(dataset) - .condition(And.builder() - .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build()))) - .addConditions(IsNull.of(CastFunction.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(dataset.datasetReference()).build(), fieldToCheckForDatatype.type()))) - .build()) - .limit(options().sampleRowCount()) - .build(); - } - private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 4c2f2b56003..fd6878c393f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -18,6 +18,7 @@ import java.util.function.Consumer; import java.util.stream.Collectors; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.Resources; @@ -277,7 +278,7 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) return LogicalPlan.of(Collections.emptyList()); } - public Map, LogicalPlan>> buildLogicalPlanForDryRunValidation(Resources resources) + public Map, LogicalPlan>>> buildLogicalPlanForDryRunValidation(Resources resources) { return Collections.emptyMap(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index a40ef45b345..438dd9219ac 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -23,6 +23,5 @@ public enum Capability DATA_TYPE_LENGTH_CHANGE, DATA_TYPE_SCALE_CHANGE, TRANSFORM_WHILE_COPY, - DRY_RUN, - TRY_CAST + DRY_RUN } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java index 479808aba5f..d87d6c44832 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java @@ -16,18 +16,18 @@ public enum ValidationCategory { - NULL_VALUES("Null values found in non-nullable column"), - DATATYPE_CONVERSION("Unable to type cast column"); + CHECK_CONSTRAINT("check_constraint"), + CONVERSION("conversion"); - private final String validationFailedErrorMessage; + private final String categoryName; - ValidationCategory(String validationFailedErrorMessage) + ValidationCategory(String categoryName) { - this.validationFailedErrorMessage = validationFailedErrorMessage; + this.categoryName = categoryName; } - public String getValidationFailedErrorMessage() + public String getCategoryName() { - return this.validationFailedErrorMessage; + return this.categoryName; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml index 7bc2908c7fc..fb91177c734 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml @@ -51,6 +51,12 @@ com.fasterxml.jackson.core jackson-databind + + + org.eclipse.collections + eclipse-collections-api + + org.finos.legend.engine diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 4b0176aad56..15cd1e209dc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -330,7 +330,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) { throw new UnsupportedOperationException("DryRun not supported!"); } @@ -352,21 +352,33 @@ protected Optional getLong(Map row, String key) protected DataError constructDataError(List allColumns, Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) { return DataError.builder() - .errorMessage(validationCategory.getValidationFailedErrorMessage()) + .errorMessage(getValidationFailedErrorMessage(validationCategory)) .file(getString(row, fileNameColumnName).orElseThrow(IllegalStateException::new)) - .errorCategory(validationCategory.name()) + .errorCategory(validationCategory.getCategoryName()) .columnName(validatedColumnName) .rowNumber(getLong(row, rowNumberColumnName)) .rejectedRecord(allColumns.stream().map(column -> getString(row, column).orElse("")).collect(Collectors.joining(","))) .build(); } - public List getDataErrorsWithFairDistributionAcrossCategories(int sampleRowCount, Map> dataErrorsByCategory) + private String getValidationFailedErrorMessage(ValidationCategory category) { - List totalErrors = dataErrorsByCategory.values().stream().flatMap(Collection::stream).collect(Collectors.toList()); - if (totalErrors.size() <= sampleRowCount) + switch (category) { - return totalErrors; + case CHECK_CONSTRAINT: + return "Null values found in non-nullable column"; + case CONVERSION: + return "Unable to type cast column"; + default: + throw new IllegalStateException("Unsupported validation category"); + } + } + + public List getDataErrorsWithFairDistributionAcrossCategories(int sampleRowCount, int dataErrorsTotalCount, Map> dataErrorsByCategory) + { + if (dataErrorsTotalCount <= sampleRowCount) + { + return dataErrorsByCategory.values().stream().flatMap(Collection::stream).collect(Collectors.toList()); } List fairlyDistributedDataErrors = new ArrayList<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java index 3ca9ec1289a..68430f504c6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -35,18 +35,18 @@ public void testTotalErrorsSmallerThanSampleRowCount() AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); Map> dataErrorsByCategory = new HashMap<>(); - dataErrorsByCategory.put(ValidationCategory.NULL_VALUES, new LinkedList<>()); - dataErrorsByCategory.put(ValidationCategory.DATATYPE_CONVERSION, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.CHECK_CONSTRAINT, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.CONVERSION, new LinkedList<>()); List expectedNullValuesErrors = new ArrayList<>(); List expectedDatatypeErrors = new ArrayList<>(); - populateDataErrors(ValidationCategory.NULL_VALUES, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); - populateDataErrors(ValidationCategory.DATATYPE_CONVERSION, 5, 5, dataErrorsByCategory, expectedDatatypeErrors); + populateDataErrors(ValidationCategory.CHECK_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.CONVERSION, 5, 5, dataErrorsByCategory, expectedDatatypeErrors); - List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, dataErrorsByCategory); + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 10, dataErrorsByCategory); Assertions.assertEquals(10, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.NULL_VALUES.name())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.DATATYPE_CONVERSION.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CHECK_CONSTRAINT.getCategoryName())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CONVERSION.getCategoryName())).collect(Collectors.toList())); } @Test @@ -55,18 +55,18 @@ public void testExhaustingOneCategory() AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); Map> dataErrorsByCategory = new HashMap<>(); - dataErrorsByCategory.put(ValidationCategory.NULL_VALUES, new LinkedList<>()); - dataErrorsByCategory.put(ValidationCategory.DATATYPE_CONVERSION, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.CHECK_CONSTRAINT, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.CONVERSION, new LinkedList<>()); List expectedNullValuesErrors = new ArrayList<>(); List expectedDatatypeErrors = new ArrayList<>(); - populateDataErrors(ValidationCategory.NULL_VALUES, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); - populateDataErrors(ValidationCategory.DATATYPE_CONVERSION, 50, 15, dataErrorsByCategory, expectedDatatypeErrors); + populateDataErrors(ValidationCategory.CHECK_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.CONVERSION, 50, 15, dataErrorsByCategory, expectedDatatypeErrors); - List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, dataErrorsByCategory); + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 55, dataErrorsByCategory); Assertions.assertEquals(20, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.NULL_VALUES.name())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.DATATYPE_CONVERSION.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CHECK_CONSTRAINT.getCategoryName())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CONVERSION.getCategoryName())).collect(Collectors.toList())); } @Test @@ -75,18 +75,18 @@ public void testExhaustingBothCategories() AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); Map> dataErrorsByCategory = new HashMap<>(); - dataErrorsByCategory.put(ValidationCategory.NULL_VALUES, new LinkedList<>()); - dataErrorsByCategory.put(ValidationCategory.DATATYPE_CONVERSION, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.CHECK_CONSTRAINT, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.CONVERSION, new LinkedList<>()); List expectedNullValuesErrors = new ArrayList<>(); List expectedDatatypeErrors = new ArrayList<>(); - populateDataErrors(ValidationCategory.NULL_VALUES, 15, 10, dataErrorsByCategory, expectedNullValuesErrors); - populateDataErrors(ValidationCategory.DATATYPE_CONVERSION, 20, 9, dataErrorsByCategory, expectedDatatypeErrors); + populateDataErrors(ValidationCategory.CHECK_CONSTRAINT, 15, 10, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.CONVERSION, 20, 9, dataErrorsByCategory, expectedDatatypeErrors); - List results = sink.getDataErrorsWithFairDistributionAcrossCategories(19, dataErrorsByCategory); + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(19, 35, dataErrorsByCategory); Assertions.assertEquals(19, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.NULL_VALUES.name())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.DATATYPE_CONVERSION.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CHECK_CONSTRAINT.getCategoryName())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CONVERSION.getCategoryName())).collect(Collectors.toList())); } private void populateDataErrors(ValidationCategory category, int totalCount, int expectedCount, Map> dataErrorsByCategory, List expectedList) @@ -108,11 +108,11 @@ private DataError getDummyDataError(ValidationCategory category, long rowNumber) { return DataError.builder() .file("some_file_name") - .errorCategory(category.name()) + .errorCategory(category.getCategoryName()) .rowNumber(rowNumber) .columnName("some_column_name") .rejectedRecord("some_data") - .errorMessage(category.getValidationFailedErrorMessage()) + .errorMessage("some_error_message") .build(); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 4e9b4f8cbeb..2b4723eab4f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; @@ -194,5 +195,5 @@ public interface ConstructDatasetFromDatabase public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); - public abstract List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount); + public abstract List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, org.finos.legend.engine.persistence.components.relational.SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 7a22f30d3c3..30e37aa4c06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.relational.api; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; @@ -60,7 +62,7 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan dryRunSqlPlan(); - public abstract Map, SqlPlan>> dryRunValidationSqlPlan(); + public abstract Map, SqlPlan>>> dryRunValidationSqlPlan(); public abstract Optional ingestDataSplitRange(); @@ -120,15 +122,15 @@ public List dryRunSql() return dryRunSqlPlan().getSqlList(); } - public Map, String>> dryRunValidationSql() + public Map, String>>> dryRunValidationSql() { return dryRunValidationSqlPlan().keySet().stream() .collect(Collectors.toMap( - k -> k, - k -> dryRunValidationSqlPlan().get(k).keySet().stream().collect(Collectors.toMap( - k2 -> k2, - k2 -> dryRunValidationSqlPlan().get(k).get(k2).getSql() - )))); + k -> k, + k -> dryRunValidationSqlPlan().get(k).stream().map( + e -> Tuples.pair(e.getOne(), e.getTwo().getSql()) + ).collect(Collectors.toList()) + )); } public List metadataIngestSql() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index c623ff8f00e..7b04d1c29ec 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.relational.api; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.Resources; @@ -43,6 +45,7 @@ import org.immutables.value.Value.Style; import java.time.Clock; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -297,15 +300,15 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann SqlPlan dryRunSqlPlan = transformer.generatePhysicalPlan(dryRunLogicalPlan); // dry-run validations - Map, LogicalPlan>> dryRunValidationLogicalPlan = planner.buildLogicalPlanForDryRunValidation(resources); - Map, SqlPlan>> dryRunValidationSqlPlan = new HashMap<>(); + Map, LogicalPlan>>> dryRunValidationLogicalPlan = planner.buildLogicalPlanForDryRunValidation(resources); + Map, SqlPlan>>> dryRunValidationSqlPlan = new HashMap<>(); for (ValidationCategory validationCategory : dryRunValidationLogicalPlan.keySet()) { - dryRunValidationSqlPlan.put(validationCategory, new HashMap<>()); - for (Set columns : dryRunValidationLogicalPlan.get(validationCategory).keySet()) + dryRunValidationSqlPlan.put(validationCategory, new ArrayList<>()); + for (Pair, LogicalPlan> pair : dryRunValidationLogicalPlan.get(validationCategory)) { - SqlPlan sqlplan = transformer.generatePhysicalPlan(dryRunValidationLogicalPlan.get(validationCategory).get(columns)); - dryRunValidationSqlPlan.get(validationCategory).put(columns, sqlplan); + SqlPlan sqlplan = transformer.generatePhysicalPlan(pair.getTwo()); + dryRunValidationSqlPlan.get(validationCategory).add(Tuples.pair(pair.getOne(), sqlplan)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml index 9509f067281..1a64c34e31f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml @@ -50,6 +50,13 @@ value + + + org.eclipse.collections + eclipse-collections-api + + + com.h2database diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index cbac4edb965..50a8688dd00 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -14,10 +14,12 @@ package org.finos.legend.engine.persistence.components.relational.h2; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Optional; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; @@ -31,7 +33,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.LoadCsv; -import org.finos.legend.engine.persistence.components.logicalplan.values.CastFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.HashFunction; @@ -39,6 +40,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -67,7 +69,7 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesSelectionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ToArrayFunctionVisitor; -import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CastFunctionVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.TryCastFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.Transformer; @@ -95,8 +97,8 @@ import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.DATATYPE_CONVERSION; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUES; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CONVERSION; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CHECK_CONSTRAINT; public class H2Sink extends AnsiSqlSink { @@ -137,7 +139,7 @@ public class H2Sink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); logicalPlanVisitorByClass.put(ToArrayFunction.class, new ToArrayFunctionVisitor()); - logicalPlanVisitorByClass.put(CastFunction.class, new CastFunctionVisitor()); + logicalPlanVisitorByClass.put(TryCastFunction.class, new TryCastFunctionVisitor()); logicalPlanVisitorByClass.put(MetadataFileNameField.class, new MetadataFileNameFieldVisitor()); logicalPlanVisitorByClass.put(MetadataRowNumberField.class, new MetadataRowNumberFieldVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); @@ -261,10 +263,11 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) { executor.executePhysicalPlan(dryRunSqlPlan); + int dataErrorsTotalCount = 0; Map> dataErrorsByCategory = new HashMap<>(); for (ValidationCategory validationCategory : ValidationCategory.values()) { @@ -273,24 +276,25 @@ public List performDryRun(Datasets datasets, Transformer allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); - Map, SqlPlan> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUES, new HashMap<>()); - Map, SqlPlan> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(DATATYPE_CONVERSION, new HashMap<>()); + List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(CHECK_CONSTRAINT, new ArrayList<>()); + List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(CONVERSION, new ArrayList<>()); // Execute queries for null values - for (Set validatedColumns : queriesForNull.keySet()) + for (Pair, SqlPlan> pair : queriesForNull) { - List results = executor.executePhysicalPlanAndGetResults(queriesForNull.get(validatedColumns)); + List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); if (!results.isEmpty()) { List> resultSets = results.get(0).getData(); for (Map row : resultSets) { - for (String column : validatedColumns.stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) { if (row.get(column) == null) { - DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, NULL_VALUES, column); - dataErrorsByCategory.get(NULL_VALUES).add(dataError); + DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, CHECK_CONSTRAINT, column); + dataErrorsByCategory.get(CHECK_CONSTRAINT).add(dataError); + dataErrorsTotalCount++; } } } @@ -298,24 +302,11 @@ public List performDryRun(Datasets datasets, Transformer> sortedMapKeysForDatatype = queriesForDatatype.keySet().stream().sorted((o1, o2) -> - { - // There is only one element in the set - Optional fieldValue1 = o1.stream().findAny(); - Optional fieldValue2 = o2.stream().findAny(); - if (fieldValue1.isPresent() && fieldValue2.isPresent()) - { - return fieldValue1.get().fieldName().compareTo(fieldValue2.get().fieldName()); - } - return 0; - }).collect(Collectors.toList()); - - for (Set validatedColumns : sortedMapKeysForDatatype) + for (Pair, SqlPlan> pair : queriesForDatatype) { try { - executor.executePhysicalPlanAndGetResults(queriesForDatatype.get(validatedColumns)); + executor.executePhysicalPlanAndGetResults(pair.getTwo()); } catch (RuntimeException e) { @@ -326,7 +317,7 @@ public List performDryRun(Datasets datasets, Transformer results = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(LogicalPlanFactory.getLogicalPlanForSelectAllFieldsWithStringFieldEquals(validatedColumn, problematicValue)), sampleRowCount); if (!results.isEmpty()) @@ -334,8 +325,9 @@ public List performDryRun(Datasets datasets, Transformer> resultSets = results.get(0).getData(); for (Map row : resultSets) { - DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, DATATYPE_CONVERSION, validatedColumn.fieldName()); - dataErrorsByCategory.get(DATATYPE_CONVERSION).add(dataError); + DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, CONVERSION, validatedColumn.fieldName()); + dataErrorsByCategory.get(CONVERSION).add(dataError); + dataErrorsTotalCount++; } } } @@ -343,6 +335,6 @@ public List performDryRun(Datasets datasets, Transformer +public class TryCastFunctionVisitor implements LogicalPlanVisitor { @Override - public VisitorResult visit(PhysicalPlanNode prev, CastFunction current, VisitorContext context) + public VisitorResult visit(PhysicalPlanNode prev, TryCastFunction current, VisitorContext context) { DataType dataType = new H2DataTypeMapping().getDataType(current.type()); - org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction castFunction - = new org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction(dataType, context.quoteIdentifier()); + CastFunction castFunction = new CastFunction(dataType, context.quoteIdentifier()); for (Optimizer optimizer : context.optimizers()) { - castFunction = (org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction) optimizer.optimize(castFunction); + castFunction = (CastFunction) optimizer.optimize(castFunction); } prev.push(castFunction); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 69e0385ef38..95ffd309cab 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -823,7 +823,7 @@ public void testBulkLoadDryRunSuccess() String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + - "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + @@ -831,18 +831,18 @@ public void testBulkLoadDryRunSuccess() String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + - "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); - Assertions.assertNull(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql1)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql2)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql3)); - Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertNull(operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT)); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).size()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); @@ -931,7 +931,7 @@ public void testBulkLoadDryRunFailure() String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + - "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + @@ -939,19 +939,19 @@ public void testBulkLoadDryRunFailure() String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"FILE\",main_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + - "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).containsValue(expectedDryRunNullValidationSql)); - Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).keySet().size()); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql1)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql2)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql3)); - Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).size()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); @@ -965,35 +965,35 @@ public void testBulkLoadDryRunFailure() List expectedErrorRecords = Arrays.asList(DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.NULL_VALUES.name()) + .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) .rowNumber(1L) .columnName(col3NonNullable.name()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.NULL_VALUES.name()) + .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) .rowNumber(2L) .columnName(col2NonNullable.name()) .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) .rowNumber(1L) .columnName(col1.name()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build(), DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) .rowNumber(1L) .columnName(col4.name()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build(), DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) .rowNumber(2L) .columnName(col3.name()) .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") @@ -1093,12 +1093,12 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).containsValue(expectedDryRunNullValidationSql)); - Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).keySet().size()); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql1)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql2)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql3)); - Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).size()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); @@ -1112,23 +1112,23 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() List expectedErrorRecords = Arrays.asList(DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.NULL_VALUES.name()) + .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) .rowNumber(1L) .columnName(col3NonNullable.name().toUpperCase()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.NULL_VALUES.name()) + .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) .rowNumber(2L) .columnName(col2NonNullable.name().toUpperCase()) .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) - .errorCategory(ValidationCategory.DATATYPE_CONVERSION.name()) + .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) .rowNumber(1L) - .columnName(col4.name().toUpperCase()) + .columnName(col1.name().toUpperCase()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml index 4a5f82ca3e6..667c7bcd5d4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml @@ -61,6 +61,13 @@ + + + org.eclipse.collections + eclipse-collections-api + + + org.slf4j diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 061832902a4..087bb80f32e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -16,6 +16,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; @@ -144,7 +145,6 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); capabilities.add(Capability.DRY_RUN); - capabilities.add(Capability.TRY_CAST); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -262,7 +262,7 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } - public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) { if (dryRunValidationSqlPlan == null || dryRunValidationSqlPlan.isEmpty()) { @@ -301,10 +301,11 @@ private List performDryRunWithValidationMode(Executor performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>> dryRunValidationSqlPlan, int sampleRowCount) + private List performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) { executor.executePhysicalPlan(dryRunSqlPlan); + int dataErrorsTotalCount = 0; Map> dataErrorsByCategory = new HashMap<>(); for (ValidationCategory validationCategory : ValidationCategory.values()) { @@ -315,20 +316,21 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex for (ValidationCategory validationCategory : dryRunValidationSqlPlan.keySet()) { - for (Set validatedColumns : dryRunValidationSqlPlan.get(validationCategory).keySet()) + for (Pair, SqlPlan> pair : dryRunValidationSqlPlan.get(validationCategory)) { - List results = executor.executePhysicalPlanAndGetResults(dryRunValidationSqlPlan.get(validationCategory).get(validatedColumns)); + List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); if (!results.isEmpty()) { List> resultSets = results.get(0).getData(); for (Map row : resultSets) { - for (String column : validatedColumns.stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) { if (row.get(column) == null) { DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, validationCategory, column); dataErrorsByCategory.get(validationCategory).add(dataError); + dataErrorsTotalCount++; } } } @@ -336,7 +338,7 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex } } - return getDataErrorsWithFairDistributionAcrossCategories(sampleRowCount, dataErrorsByCategory); + return getDataErrorsWithFairDistributionAcrossCategories(sampleRowCount, dataErrorsTotalCount, dataErrorsByCategory); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java index dedb1a1e63f..ae108a68adc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberColumn.java @@ -36,5 +36,6 @@ public void genSql(StringBuilder builder) throws SqlDomException public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException { builder.append("METADATA$FILE_ROW_NUMBER"); + builder.append(" + 1"); // This is to standardize such that row numbers start from 1 } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index c98dddee370..c5c876ad8f0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; @@ -138,7 +139,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); List dryRunSql = operations.dryRunSql(); - Map, String>> dryRunValidationSql = operations.dryRunValidationSql(); + Map, String>>> dryRunValidationSql = operations.dryRunValidationSql(); List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); List dryRunPostCleanupSql = operations.dryRunPostCleanupSql(); @@ -158,7 +159,6 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; - String expectedDryRunDeleteSql = "DELETE FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf"; String expectedDryRunLoadSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + @@ -170,8 +170,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); - Assertions.assertEquals(expectedDryRunDeleteSql, dryRunSql.get(0)); - Assertions.assertEquals(expectedDryRunLoadSql, dryRunSql.get(1)); + Assertions.assertEquals(expectedDryRunLoadSql, dryRunSql.get(0)); Assertions.assertTrue(dryRunValidationSql.isEmpty()); Assertions.assertEquals(expectedDryRunPostCleanupSql, dryRunPostCleanupSql.get(0)); @@ -255,27 +254,31 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() String expectedDryRunDeleteSql = "DELETE FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf"; String expectedDryRunLoadSQl = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" (\"col_bigint\", \"col_variant\", \"FILE\", \"ROW_NUMBER\") " + - "FROM (SELECT t.$4 as \"col_bigint\",t.$5 as \"col_variant\",METADATA$FILENAME,METADATA$FILE_ROW_NUMBER FROM my_location as t) " + + "FROM (SELECT t.$4 as \"col_bigint\",t.$5 as \"col_variant\",METADATA$FILENAME,METADATA$FILE_ROW_NUMBER + 1 FROM my_location as t) " + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') FILE_FORMAT = (TYPE = 'AVRO') ON_ERROR = 'ABORT_STATEMENT'"; String expectedDryRunNullValidationSql = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"FILE\",my_name_validation_lp_yosulf.\"ROW_NUMBER\" " + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + "WHERE (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL) OR (my_name_validation_lp_yosulf.\"col_variant\" IS NULL) LIMIT 20"; - String expectedDryRunDatatypeValidationSql = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"FILE\",my_name_validation_lp_yosulf.\"ROW_NUMBER\" " + - "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf WHERE " + - "((NOT (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_bigint\" AS BIGINT) IS NULL)) " + - "OR ((NOT (my_name_validation_lp_yosulf.\"col_variant\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_variant\" AS VARIANT) IS NULL)) LIMIT 20"; + String expectedDryRunDatatypeValidationSql1 = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"FILE\",my_name_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + + "WHERE (NOT (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_bigint\" AS BIGINT) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"FILE\",my_name_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + + "WHERE (NOT (my_name_validation_lp_yosulf.\"col_variant\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_variant\" AS VARIANT) IS NULL) LIMIT 20"; String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).containsValue(expectedDryRunNullValidationSql)); - Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUES).keySet().size()); - Assertions.assertTrue(operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).containsValue(expectedDryRunDatatypeValidationSql)); - Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.DATATYPE_CONVERSION).keySet().size()); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(1).getTwo()); + Assertions.assertEquals(2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).size()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); } @@ -672,7 +675,6 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; - String expectedDryRunDeleteSql = "DELETE FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf"; String expectedDryRunLoadSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + @@ -683,8 +685,7 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedDryRunPreActionsSql, operations.dryRunPreActionsSql().get(0)); - Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); - Assertions.assertEquals(expectedDryRunLoadSql, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunLoadSql, operations.dryRunSql().get(0)); Assertions.assertTrue(operations.dryRunValidationSql().isEmpty()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); From c4252ebe0ca71cc6014314995244cd102754c70b Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Mar 2024 16:49:33 +0800 Subject: [PATCH 20/32] Address comments --- .../components/relational/h2/H2Sink.java | 18 +++++-- .../visitor/MetadataFileNameFieldVisitor.java | 4 +- .../MetadataRowNumberFieldVisitor.java | 4 +- ...Column.java => MetadataFileNameValue.java} | 4 +- ...olumn.java => MetadataRowNumberValue.java} | 4 +- .../components/ingestmode/BulkLoadTest.java | 48 +++++++++++++++++-- 6 files changed, 67 insertions(+), 15 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/{MetadataFileNameColumn.java => MetadataFileNameValue.java} (92%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/{MetadataRowNumberColumn.java => MetadataRowNumberValue.java} (92%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 50a8688dd00..16bef385f1a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -311,10 +311,7 @@ public List performDryRun(Datasets datasets, Transformer performDryRun(Datasets datasets, Transformer metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER," + - "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"" + + "(\"COL_INT\" INTEGER NOT NULL,\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" DATETIME)"; String expectedIngestSql = "COPY INTO \"MY_DB\".\"MY_NAME\" " + "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + "FROM " + @@ -347,6 +353,42 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() Assertions.assertNull(statsSql.get(ROWS_TERMINATED)); Assertions.assertNull(statsSql.get(ROWS_UPDATED)); Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"MY_DB\".\"MY_NAME\" as my_alias WHERE my_alias.\"BATCH_ID\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\"" + + "(\"COL_INT\" VARCHAR,\"COL_INTEGER\" VARCHAR,\"FILE\" VARCHAR,\"ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "COPY INTO \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" " + + "(\"COL_INT\", \"COL_INTEGER\", \"FILE\", \"ROW_NUMBER\") FROM " + + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\",METADATA$FILENAME,METADATA$FILE_ROW_NUMBER + 1 " + + "FROM my_location as legend_persistence_stage) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') FILE_FORMAT = (FORMAT_NAME = 'my_file_format') ON_ERROR = 'ABORT_STATEMENT'"; + + String expectedDryRunNullValidationSql = "SELECT MY_NAME_validation_lp_yosulf.\"COL_INT\",MY_NAME_validation_lp_yosulf.\"COL_INTEGER\",MY_NAME_validation_lp_yosulf.\"FILE\",MY_NAME_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf " + + "WHERE MY_NAME_validation_lp_yosulf.\"COL_INT\" IS NULL LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT MY_NAME_validation_lp_yosulf.\"COL_INT\",MY_NAME_validation_lp_yosulf.\"COL_INTEGER\",MY_NAME_validation_lp_yosulf.\"FILE\",MY_NAME_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf " + + "WHERE (NOT (MY_NAME_validation_lp_yosulf.\"COL_INT\" IS NULL)) AND (TRY_CAST(MY_NAME_validation_lp_yosulf.\"COL_INT\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT MY_NAME_validation_lp_yosulf.\"COL_INT\",MY_NAME_validation_lp_yosulf.\"COL_INTEGER\",MY_NAME_validation_lp_yosulf.\"FILE\",MY_NAME_validation_lp_yosulf.\"ROW_NUMBER\" " + + "FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf " + + "WHERE (NOT (MY_NAME_validation_lp_yosulf.\"COL_INTEGER\" IS NULL)) AND (TRY_CAST(MY_NAME_validation_lp_yosulf.\"COL_INTEGER\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(1).getTwo()); + Assertions.assertEquals(2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); } @Test From d5e7af2ef9f23026a0417166d1fb8c2ef40629a7 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Mar 2024 16:54:11 +0800 Subject: [PATCH 21/32] Fix checkstyle --- .../engine/persistence/components/relational/h2/H2Sink.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 16bef385f1a..1d5fcec25b2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -343,7 +343,7 @@ private String extractProblematicValueFromErrorMessage(String errorMessage) } else if (errorMessage.contains("Cannot parse")) { - return errorMessage.replaceFirst("org.h2.jdbc.JdbcSQLDataException: Cannot parse \"(.*)\" constant " , "").replaceAll("\"", ""); + return errorMessage.replaceFirst("org.h2.jdbc.JdbcSQLDataException: Cannot parse \"(.*)\" constant ", "").replaceAll("\"", ""); } return errorMessage; } From ea167486afb196113f1654c6fea1af5a3b802a46 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 5 Mar 2024 13:41:49 +0800 Subject: [PATCH 22/32] Fix snowflake datatype check --- .../relational/ansi/AnsiSqlSink.java | 31 +++++++++++++++ .../components/relational/h2/H2Sink.java | 27 ++----------- .../relational/snowflake/SnowflakeSink.java | 38 ++++++++++--------- 3 files changed, 54 insertions(+), 42 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 15cd1e209dc..1068d3b0de9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -162,11 +162,16 @@ import java.util.*; import java.util.stream.Collectors; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CHECK_CONSTRAINT; + public class AnsiSqlSink extends RelationalSink { private static final RelationalSink INSTANCE; protected static final Map, LogicalPlanVisitor> LOGICAL_PLAN_VISITOR_BY_CLASS; + protected static final String FILE_WITH_ERROR = "FILE"; + protected static final String ROW_NUMBER = "ROW_NUMBER"; + static { Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -349,6 +354,32 @@ protected Optional getLong(Map row, String key) return Optional.ofNullable(longValue); } + protected int findNullValuesDataErrors(Executor executor, List, SqlPlan>> queriesForNull, Map> dataErrorsByCategory, List allFields) + { + int errorsCount = 0; + for (org.eclipse.collections.api.tuple.Pair, SqlPlan> pair : queriesForNull) + { + List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + { + if (row.get(column) == null) + { + DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, CHECK_CONSTRAINT, column); + dataErrorsByCategory.get(CHECK_CONSTRAINT).add(dataError); + errorsCount++; + } + } + } + } + } + return errorsCount; + } + protected DataError constructDataError(List allColumns, Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) { return DataError.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 1d5fcec25b2..da921dc8abf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -109,9 +109,6 @@ public class H2Sink extends AnsiSqlSink private static final Map> IMPLICIT_DATA_TYPE_MAPPING; private static final Map> EXPLICIT_DATA_TYPE_MAPPING; - private static final String FILE = "FILE"; - private static final String ROW_NUMBER = "ROW_NUMBER"; - static { Set capabilities = new HashSet<>(); @@ -280,26 +277,8 @@ public List performDryRun(Datasets datasets, Transformer, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(CONVERSION, new ArrayList<>()); // Execute queries for null values - for (Pair, SqlPlan> pair : queriesForNull) - { - List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); - if (!results.isEmpty()) - { - List> resultSets = results.get(0).getData(); - for (Map row : resultSets) - { - for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) - { - if (row.get(column) == null) - { - DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, CHECK_CONSTRAINT, column); - dataErrorsByCategory.get(CHECK_CONSTRAINT).add(dataError); - dataErrorsTotalCount++; - } - } - } - } - } + int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields); + dataErrorsTotalCount += nullValuesErrorsCount; // Execute queries for datatype conversion for (Pair, SqlPlan> pair : queriesForDatatype) @@ -322,7 +301,7 @@ public List performDryRun(Datasets datasets, Transformer> resultSets = results.get(0).getData(); for (Map row : resultSets) { - DataError dataError = constructDataError(allFields, row, FILE, ROW_NUMBER, CONVERSION, validatedColumn.fieldName()); + DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, CONVERSION, validatedColumn.fieldName()); dataErrorsByCategory.get(CONVERSION).add(dataError); dataErrorsTotalCount++; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 087bb80f32e..75297109f42 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -105,6 +105,8 @@ import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CHECK_CONSTRAINT; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CONVERSION; public class SnowflakeSink extends AnsiSqlSink { @@ -123,15 +125,12 @@ public class SnowflakeSink extends AnsiSqlSink private static final String ERRORS_SEEN = "errors_seen"; private static final String FIRST_ERROR = "first_error"; private static final String FIRST_ERROR_COLUMN_NAME = "first_error_column_name"; - private static final String ERROR = "ERROR"; - private static final String FILE_WITH_ERROR = "FILE"; private static final String LINE = "LINE"; private static final String CHARACTER = "CHARACTER"; private static final String BYTE_OFFSET = "BYTE_OFFSET"; private static final String CATEGORY = "CATEGORY"; private static final String COLUMN_NAME = "COLUMN_NAME"; - private static final String ROW_NUMBER = "ROW_NUMBER"; private static final String ROW_START_LINE = "ROW_START_LINE"; private static final String REJECTED_RECORD = "REJECTED_RECORD"; @@ -314,25 +313,28 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); - for (ValidationCategory validationCategory : dryRunValidationSqlPlan.keySet()) + List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(CHECK_CONSTRAINT, new ArrayList<>()); + List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(CONVERSION, new ArrayList<>()); + + // Execute queries for null values + int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields); + dataErrorsTotalCount += nullValuesErrorsCount; + + // Execute queries for datatype conversion + for (Pair, SqlPlan> pair : queriesForDatatype) { - for (Pair, SqlPlan> pair : dryRunValidationSqlPlan.get(validationCategory)) + List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); + if (!results.isEmpty()) { - List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); - if (!results.isEmpty()) + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) { - List> resultSets = results.get(0).getData(); - for (Map row : resultSets) + // This loop will only be executed once as there is always only one element in the set + for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) { - for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) - { - if (row.get(column) == null) - { - DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, validationCategory, column); - dataErrorsByCategory.get(validationCategory).add(dataError); - dataErrorsTotalCount++; - } - } + DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, CONVERSION, column); + dataErrorsByCategory.get(CONVERSION).add(dataError); + dataErrorsTotalCount++; } } } From e7a0c8f011822b80e29f064bbcba1960b81c871e Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Tue, 5 Mar 2024 12:07:45 +0530 Subject: [PATCH 23/32] Clean up Batch Error --- .../components/relational/ansi/AnsiSqlSink.java | 2 +- .../util/DataErrorFairDistributionTest.java | 2 +- .../relational/api/DataErrorAbstract.java | 4 +--- .../ingestmode/bulkload/BulkLoadTest.java | 16 ++++++++-------- .../relational/snowflake/SnowflakeSink.java | 3 +-- 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 1068d3b0de9..7621e65d699 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -387,7 +387,7 @@ protected DataError constructDataError(List allColumns, Map getString(row, column).orElse("")).collect(Collectors.joining(","))) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java index 68430f504c6..18b7a172d29 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -109,7 +109,7 @@ private DataError getDummyDataError(ValidationCategory category, long rowNumber) return DataError.builder() .file("some_file_name") .errorCategory(category.getCategoryName()) - .rowNumber(rowNumber) + .recordNumber(rowNumber) .columnName("some_column_name") .rejectedRecord("some_data") .errorMessage("some_error_message") diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java index 1c41520ae06..1e1c0c701cd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java @@ -40,9 +40,7 @@ public interface DataErrorAbstract Optional characterPosition(); - Optional rowNumber(); - - Optional rowStartLineNumber(); + Optional recordNumber(); Optional rejectedRecord(); } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 95ffd309cab..251ec4fc5ee 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -966,35 +966,35 @@ public void testBulkLoadDryRunFailure() List expectedErrorRecords = Arrays.asList(DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .rowNumber(1L) + .recordNumber(1L) .columnName(col3NonNullable.name()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .rowNumber(2L) + .recordNumber(2L) .columnName(col2NonNullable.name()) .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .rowNumber(1L) + .recordNumber(1L) .columnName(col1.name()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .rowNumber(1L) + .recordNumber(1L) .columnName(col4.name()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .rowNumber(2L) + .recordNumber(2L) .columnName(col3.name()) .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Unable to type cast column") @@ -1113,21 +1113,21 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() List expectedErrorRecords = Arrays.asList(DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .rowNumber(1L) + .recordNumber(1L) .columnName(col3NonNullable.name().toUpperCase()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .rowNumber(2L) + .recordNumber(2L) .columnName(col2NonNullable.name().toUpperCase()) .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") .build(), DataError.builder() .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .rowNumber(1L) + .recordNumber(1L) .columnName(col1.name().toUpperCase()) .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 75297109f42..a2e35ba304e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -290,8 +290,7 @@ private List performDryRunWithValidationMode(Executor Date: Wed, 6 Mar 2024 13:32:25 +0800 Subject: [PATCH 24/32] Add error category --- .../relational/api/ErrorCategory.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java new file mode 100644 index 00000000000..c42997ec298 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java @@ -0,0 +1,25 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.api; + +public enum ErrorCategory +{ + TYPE_CONVERSION, + CHECK_NULL_CONSTRAINT, + CHECK_OTHER_CONSTRAINT, + PARSING_ERROR, + FILE_NOT_FOUND, + UNKNOWN +} \ No newline at end of file From 1d2497ed835860fe21e4875349f4403bb1390680 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 6 Mar 2024 12:46:19 +0530 Subject: [PATCH 25/32] Clean up interface for Data Error --- .../relational/ansi/AnsiSqlSink.java | 16 ++++-- .../util/DataErrorFairDistributionTest.java | 11 ++-- .../relational/api/DataErrorAbstract.java | 22 +++---- .../ingestmode/bulkload/BulkLoadTest.java | 57 ++++++++----------- .../relational/snowflake/SnowflakeSink.java | 12 ++-- 5 files changed, 62 insertions(+), 56 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 7621e65d699..2278dd39914 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -382,16 +382,24 @@ protected int findNullValuesDataErrors(Executor ex protected DataError constructDataError(List allColumns, Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) { + Map errorDetails = buildErrorDetails(getString(row, fileNameColumnName), Optional.of(validatedColumnName), getLong(row, rowNumberColumnName)); return DataError.builder() .errorMessage(getValidationFailedErrorMessage(validationCategory)) - .file(getString(row, fileNameColumnName).orElseThrow(IllegalStateException::new)) .errorCategory(validationCategory.getCategoryName()) - .columnName(validatedColumnName) - .recordNumber(getLong(row, rowNumberColumnName)) - .rejectedRecord(allColumns.stream().map(column -> getString(row, column).orElse("")).collect(Collectors.joining(","))) + .putAllErrorDetails(errorDetails) + .errorRecord(allColumns.stream().map(column -> getString(row, column).orElse("")).collect(Collectors.joining(","))) .build(); } + protected Map buildErrorDetails(Optional fileName, Optional columnName, Optional recordNumber) + { + Map errorDetails = new HashMap<>(); + fileName.ifPresent(file -> errorDetails.put(DataError.FILE_NAME, file)); + columnName.ifPresent(col -> errorDetails.put(DataError.COLUMN_NAME, col)); + recordNumber.ifPresent(rowNum -> errorDetails.put(DataError.RECORD_NUMBER, rowNum)); + return errorDetails; + } + private String getValidationFailedErrorMessage(ValidationCategory category) { switch (category) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java index 18b7a172d29..25a57c096fa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -106,12 +106,15 @@ private void populateDataErrors(ValidationCategory category, int totalCount, int private DataError getDummyDataError(ValidationCategory category, long rowNumber) { + Map errorDetails = new HashMap<>(); + errorDetails.put(DataError.FILE_NAME, "some_file_name"); + errorDetails.put(DataError.RECORD_NUMBER, rowNumber); + errorDetails.put(DataError.COLUMN_NAME, "some_column_name"); + return DataError.builder() - .file("some_file_name") .errorCategory(category.getCategoryName()) - .recordNumber(rowNumber) - .columnName("some_column_name") - .rejectedRecord("some_data") + .putAllErrorDetails(errorDetails) + .errorRecord("some_data") .errorMessage("some_error_message") .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java index 1e1c0c701cd..dd151897075 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java @@ -16,6 +16,7 @@ import org.immutables.value.Value; +import java.util.Map; import java.util.Optional; @Value.Immutable @@ -28,19 +29,20 @@ ) public interface DataErrorAbstract { - String errorMessage(); - - String file(); - - String errorCategory(); - Optional columnName(); + public static final String FILE_NAME = "file"; + public static final String LINE_NUMBER = "line_number"; + public static final String RECORD_NUMBER = "record_number"; + public static final String COLUMN_NAME = "column_name"; + public static final String CHARACTER_POSITION = "character_position"; + public static final String NUM_DUPLICATES = "num_duplicates"; + public static final String NUM_DATA_VERSION_ERRORS = "num_data_version_errors"; - Optional lineNumber(); + String errorMessage(); - Optional characterPosition(); + String errorCategory(); - Optional recordNumber(); + Optional errorRecord(); - Optional rejectedRecord(); + Map errorDetails(); } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 251ec4fc5ee..44751118586 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -964,40 +964,30 @@ public void testBulkLoadDryRunFailure() DryRunResult dryRunResult = ingestor.dryRun(); List expectedErrorRecords = Arrays.asList(DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .recordNumber(1L) - .columnName(col3NonNullable.name()) - .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name(), 1L)) .build(), DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .recordNumber(2L) - .columnName(col2NonNullable.name()) - .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name(), 2L)) .build(), DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .recordNumber(1L) - .columnName(col1.name()) - .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col1.name(), 1L)) .build(), DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .recordNumber(1L) - .columnName(col4.name()) - .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col4.name(), 1L)) .build(), DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .recordNumber(2L) - .columnName(col3.name()) - .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col3.name(), 2L)) .build()); Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); @@ -1111,26 +1101,20 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() DryRunResult dryRunResult = ingestor.dryRun(); List expectedErrorRecords = Arrays.asList(DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .recordNumber(1L) - .columnName(col3NonNullable.name().toUpperCase()) - .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name(), 1L)) .build(), DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .recordNumber(2L) - .columnName(col2NonNullable.name().toUpperCase()) - .rejectedRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name(), 2L)) .build(), DataError.builder() - .file(filePath) .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .recordNumber(1L) - .columnName(col1.name().toUpperCase()) - .rejectedRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col1.name(), 1L)) .build()); Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); @@ -1241,4 +1225,13 @@ private void verifyBulkLoadMetadataForUpperCase(Map appendMetada } } + private Map buildErrorDetails(String fileName, String columnName,Long recordNumber) + { + Map errorDetails = new HashMap<>(); + errorDetails.put(DataError.FILE_NAME, fileName); + errorDetails.put(DataError.COLUMN_NAME, columnName); + errorDetails.put(DataError.RECORD_NUMBER, recordNumber); + return errorDetails; + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index a2e35ba304e..604641ec90c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -283,15 +283,15 @@ private List performDryRunWithValidationMode(Executor> resultSets = results.get(0).getData(); for (Map row : resultSets) { + Map errorDetails = buildErrorDetails(getString(row, FILE_WITH_ERROR), getString(row, COLUMN_NAME), getLong(row, ROW_NUMBER)); + getLong(row, LINE).ifPresent(line -> errorDetails.put(DataError.LINE_NUMBER, line)); + getLong(row, CHARACTER).ifPresent(characterPos -> errorDetails.put(DataError.CHARACTER_POSITION, characterPos)); + DataError dataError = DataError.builder() .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) - .file(getString(row, FILE_WITH_ERROR).orElseThrow(IllegalStateException::new)) .errorCategory(getString(row, CATEGORY).orElseThrow(IllegalStateException::new)) - .columnName(getString(row, COLUMN_NAME)) - .lineNumber(getLong(row, LINE)) - .characterPosition(getLong(row, CHARACTER)) - .recordNumber(getLong(row, ROW_NUMBER)) - .rejectedRecord(getString(row, REJECTED_RECORD)) + .putAllErrorDetails(errorDetails) + .errorRecord(getString(row, REJECTED_RECORD)) .build(); dataErrors.add(dataError); } From 39664d9e745fdce34f8de40000809f272fd88752 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 6 Mar 2024 15:26:43 +0800 Subject: [PATCH 26/32] Fix test --- .../components/ingestmode/bulkload/BulkLoadTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 44751118586..a14b738626c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -1104,17 +1104,17 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Null values found in non-nullable column") - .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name(), 1L)) + .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name().toUpperCase(), 1L)) .build(), DataError.builder() .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) .errorRecord("2,,NaN,2022-01-12 00:00:00.0") .errorMessage("Null values found in non-nullable column") - .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name(), 2L)) + .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name().toUpperCase(), 2L)) .build(), DataError.builder() .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) .errorRecord("??,Andy,,2022-01-99 00:00:00.0") .errorMessage("Unable to type cast column") - .putAllErrorDetails(buildErrorDetails(filePath, col1.name(), 1L)) + .putAllErrorDetails(buildErrorDetails(filePath, col1.name().toUpperCase(), 1L)) .build()); Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); @@ -1225,7 +1225,7 @@ private void verifyBulkLoadMetadataForUpperCase(Map appendMetada } } - private Map buildErrorDetails(String fileName, String columnName,Long recordNumber) + private Map buildErrorDetails(String fileName, String columnName, Long recordNumber) { Map errorDetails = new HashMap<>(); errorDetails.put(DataError.FILE_NAME, fileName); From e880452979b21b69d3325d4990bfb917239940d0 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 7 Mar 2024 16:45:27 +0800 Subject: [PATCH 27/32] Add exception handling and adapt to new DataError model --- .../MetadataRowNumberFieldAbstract.java | 3 + .../components/planner/BulkLoadPlanner.java | 12 +- .../components/util/ValidationCategory.java | 16 +- .../relational/ansi/AnsiSqlSink.java | 97 ++++++-- .../util/DataErrorFairDistributionTest.java | 47 ++-- .../components/relational/RelationalSink.java | 2 +- .../relational/api/ErrorCategory.java | 24 +- .../api/RelationalIngestorAbstract.java | 2 +- .../components/relational/h2/H2Sink.java | 51 +++- .../ingestmode/bulkload/BulkLoadTest.java | 222 ++++++++++++++---- .../pom.xml | 8 + .../relational/snowflake/SnowflakeSink.java | 168 +++++++++++-- .../MetadataRowNumberFieldVisitor.java | 17 +- .../values/MetadataRowNumberValue.java | 13 +- .../components/ingestmode/BulkLoadTest.java | 42 ++-- .../SnowflakeRejectedRecordParserTest.java | 129 ++++++++++ 16 files changed, 684 insertions(+), 169 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/SnowflakeRejectedRecordParserTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java index 3e4e96d750c..23f0e7fdfa6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.logicalplan.values; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; + import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; @@ -27,4 +29,5 @@ ) public interface MetadataRowNumberFieldAbstract extends Value { + StagedFilesDatasetProperties stagedFilesDatasetProperties(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index b605d14f487..9a9b9b57a06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -75,8 +75,8 @@ class BulkLoadPlanner extends Planner private Dataset validationDataset; private StagedFilesDataset stagedFilesDataset; - private static final String FILE = "FILE"; - private static final String ROW_NUMBER = "ROW_NUMBER"; + private static final String FILE = "legend_persistence_file"; + private static final String ROW_NUMBER = "legend_persistence_row_number"; BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { @@ -181,7 +181,7 @@ public LogicalPlan buildLogicalPlanForDryRun(Resources resources) List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValuesWithVarCharType(stagingDataset()); fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); - fieldsToSelect.add(MetadataRowNumberField.builder().build()); + fieldsToSelect.add(MetadataRowNumberField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); fieldsToInsert.add(FieldValue.builder().fieldName(FILE).datasetRef(stagingDataset().datasetReference()).build()); @@ -241,14 +241,14 @@ public Map, LogicalPlan>>> buildLo .limit(options().sampleRowCount()) .build(); - validationMap.put(ValidationCategory.CHECK_CONSTRAINT, + validationMap.put(ValidationCategory.NULL_VALUE, Collections.singletonList(Tuples.pair(fieldsToCheckForNull.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), LogicalPlan.of(Collections.singletonList(queryForNull))))); } if (!fieldsToCheckForDatatype.isEmpty()) { - validationMap.put(ValidationCategory.CONVERSION, new ArrayList<>()); + validationMap.put(ValidationCategory.TYPE_CONVERSION, new ArrayList<>()); for (Field fieldToCheckForDatatype : fieldsToCheckForDatatype) { @@ -261,7 +261,7 @@ public Map, LogicalPlan>>> buildLo .limit(options().sampleRowCount()) .build(); - validationMap.get(ValidationCategory.CONVERSION).add(Tuples.pair(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + validationMap.get(ValidationCategory.TYPE_CONVERSION).add(Tuples.pair(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), LogicalPlan.of(Collections.singletonList(queryForDatatype)))); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java index d87d6c44832..c2fbb123b81 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java @@ -16,18 +16,6 @@ public enum ValidationCategory { - CHECK_CONSTRAINT("check_constraint"), - CONVERSION("conversion"); - - private final String categoryName; - - ValidationCategory(String categoryName) - { - this.categoryName = categoryName; - } - - public String getCategoryName() - { - return this.categoryName; - } + NULL_VALUE, + TYPE_CONVERSION } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 2278dd39914..2c9ba6c5818 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.relational.ansi; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; @@ -148,6 +150,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.TruncateVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.WindowFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; @@ -162,15 +165,15 @@ import java.util.*; import java.util.stream.Collectors; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CHECK_CONSTRAINT; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; public class AnsiSqlSink extends RelationalSink { private static final RelationalSink INSTANCE; protected static final Map, LogicalPlanVisitor> LOGICAL_PLAN_VISITOR_BY_CLASS; - protected static final String FILE_WITH_ERROR = "FILE"; - protected static final String ROW_NUMBER = "ROW_NUMBER"; + private static final String FILE = "legend_persistence_file"; + private static final String ROW_NUMBER = "legend_persistence_row_number"; static { @@ -335,7 +338,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) { throw new UnsupportedOperationException("DryRun not supported!"); } @@ -354,7 +357,23 @@ protected Optional getLong(Map row, String key) return Optional.ofNullable(longValue); } - protected int findNullValuesDataErrors(Executor executor, List, SqlPlan>> queriesForNull, Map> dataErrorsByCategory, List allFields) + protected Optional getChar(Map row, String key) + { + Object value = row.get(key); + if (value instanceof Character) + { + Character charValue = value == null ? null : (Character) value; + return Optional.ofNullable(charValue); + } + if (value instanceof String) + { + Optional stringValue = getString(row, key); + return stringValue.map(s -> s.charAt(0)); + } + return Optional.empty(); + } + + protected int findNullValuesDataErrors(Executor executor, List, SqlPlan>> queriesForNull, Map> dataErrorsByCategory, List allFields, CaseConversion caseConversion) { int errorsCount = 0; for (org.eclipse.collections.api.tuple.Pair, SqlPlan> pair : queriesForNull) @@ -369,8 +388,8 @@ protected int findNullValuesDataErrors(Executor ex { if (row.get(column) == null) { - DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, CHECK_CONSTRAINT, column); - dataErrorsByCategory.get(CHECK_CONSTRAINT).add(dataError); + DataError dataError = constructDataError(allFields, row, NULL_VALUE, column, caseConversion); + dataErrorsByCategory.get(NULL_VALUE).add(dataError); errorsCount++; } } @@ -380,14 +399,33 @@ protected int findNullValuesDataErrors(Executor ex return errorsCount; } - protected DataError constructDataError(List allColumns, Map row, String fileNameColumnName, String rowNumberColumnName, ValidationCategory validationCategory, String validatedColumnName) + protected DataError constructDataError(List allColumns, Map row, ValidationCategory validationCategory, String validatedColumnName, CaseConversion caseConversion) { - Map errorDetails = buildErrorDetails(getString(row, fileNameColumnName), Optional.of(validatedColumnName), getLong(row, rowNumberColumnName)); + ErrorCategory errorCategory = getValidationFailedErrorCategory(validationCategory); + + String fileColumnName; + String rowNumberColumnName; + switch (caseConversion) + { + case TO_UPPER: + fileColumnName = FILE.toUpperCase(); + rowNumberColumnName = ROW_NUMBER.toUpperCase(); + break; + case TO_LOWER: + fileColumnName = FILE.toLowerCase(); + rowNumberColumnName = ROW_NUMBER.toLowerCase(); + break; + default: + fileColumnName = FILE; + rowNumberColumnName = ROW_NUMBER; + } + Map errorDetails = buildErrorDetails(getString(row, fileColumnName), Optional.of(validatedColumnName), getLong(row, rowNumberColumnName)); + return DataError.builder() - .errorMessage(getValidationFailedErrorMessage(validationCategory)) - .errorCategory(validationCategory.getCategoryName()) + .errorMessage(errorCategory.getDefaultErrorMessage()) + .errorCategory(errorCategory.name()) .putAllErrorDetails(errorDetails) - .errorRecord(allColumns.stream().map(column -> getString(row, column).orElse("")).collect(Collectors.joining(","))) + .errorRecord(buildErrorRecord(allColumns, row)) .build(); } @@ -400,14 +438,37 @@ protected Map buildErrorDetails(Optional fileName, Optio return errorDetails; } - private String getValidationFailedErrorMessage(ValidationCategory category) + protected String buildErrorRecord(List allColumns, Map row) + { + Map errorRecordMap = new HashMap<>(); + + for (String column : allColumns) + { + if (row.containsKey(column)) + { + errorRecordMap.put(column, row.get(column)); + } + } + + ObjectMapper objectMapper = new ObjectMapper(); + try + { + return objectMapper.writeValueAsString(errorRecordMap); + } + catch (JsonProcessingException e) + { + throw new RuntimeException(e); + } + } + + private ErrorCategory getValidationFailedErrorCategory(ValidationCategory validationCategory) { - switch (category) + switch (validationCategory) { - case CHECK_CONSTRAINT: - return "Null values found in non-nullable column"; - case CONVERSION: - return "Unable to type cast column"; + case NULL_VALUE: + return ErrorCategory.CHECK_NULL_CONSTRAINT; + case TYPE_CONVERSION: + return ErrorCategory.TYPE_CONVERSION; default: throw new IllegalStateException("Unsupported validation category"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java index 25a57c096fa..31e1fa04d13 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -35,18 +36,18 @@ public void testTotalErrorsSmallerThanSampleRowCount() AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); Map> dataErrorsByCategory = new HashMap<>(); - dataErrorsByCategory.put(ValidationCategory.CHECK_CONSTRAINT, new LinkedList<>()); - dataErrorsByCategory.put(ValidationCategory.CONVERSION, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUE, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.TYPE_CONVERSION, new LinkedList<>()); List expectedNullValuesErrors = new ArrayList<>(); List expectedDatatypeErrors = new ArrayList<>(); - populateDataErrors(ValidationCategory.CHECK_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); - populateDataErrors(ValidationCategory.CONVERSION, 5, 5, dataErrorsByCategory, expectedDatatypeErrors); + populateDataErrors(ValidationCategory.NULL_VALUE, ErrorCategory.CHECK_NULL_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.TYPE_CONVERSION, ErrorCategory.TYPE_CONVERSION, 5, 5, dataErrorsByCategory, expectedDatatypeErrors); List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 10, dataErrorsByCategory); Assertions.assertEquals(10, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CHECK_CONSTRAINT.getCategoryName())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CONVERSION.getCategoryName())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION.name())).collect(Collectors.toList())); } @Test @@ -55,18 +56,18 @@ public void testExhaustingOneCategory() AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); Map> dataErrorsByCategory = new HashMap<>(); - dataErrorsByCategory.put(ValidationCategory.CHECK_CONSTRAINT, new LinkedList<>()); - dataErrorsByCategory.put(ValidationCategory.CONVERSION, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUE, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.TYPE_CONVERSION, new LinkedList<>()); List expectedNullValuesErrors = new ArrayList<>(); List expectedDatatypeErrors = new ArrayList<>(); - populateDataErrors(ValidationCategory.CHECK_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); - populateDataErrors(ValidationCategory.CONVERSION, 50, 15, dataErrorsByCategory, expectedDatatypeErrors); + populateDataErrors(ValidationCategory.NULL_VALUE, ErrorCategory.CHECK_NULL_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.TYPE_CONVERSION, ErrorCategory.TYPE_CONVERSION, 50, 15, dataErrorsByCategory, expectedDatatypeErrors); List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 55, dataErrorsByCategory); Assertions.assertEquals(20, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CHECK_CONSTRAINT.getCategoryName())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CONVERSION.getCategoryName())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION.name())).collect(Collectors.toList())); } @Test @@ -75,27 +76,27 @@ public void testExhaustingBothCategories() AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); Map> dataErrorsByCategory = new HashMap<>(); - dataErrorsByCategory.put(ValidationCategory.CHECK_CONSTRAINT, new LinkedList<>()); - dataErrorsByCategory.put(ValidationCategory.CONVERSION, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUE, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.TYPE_CONVERSION, new LinkedList<>()); List expectedNullValuesErrors = new ArrayList<>(); List expectedDatatypeErrors = new ArrayList<>(); - populateDataErrors(ValidationCategory.CHECK_CONSTRAINT, 15, 10, dataErrorsByCategory, expectedNullValuesErrors); - populateDataErrors(ValidationCategory.CONVERSION, 20, 9, dataErrorsByCategory, expectedDatatypeErrors); + populateDataErrors(ValidationCategory.NULL_VALUE, ErrorCategory.CHECK_NULL_CONSTRAINT, 15, 10, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.TYPE_CONVERSION, ErrorCategory.TYPE_CONVERSION, 20, 9, dataErrorsByCategory, expectedDatatypeErrors); List results = sink.getDataErrorsWithFairDistributionAcrossCategories(19, 35, dataErrorsByCategory); Assertions.assertEquals(19, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CHECK_CONSTRAINT.getCategoryName())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ValidationCategory.CONVERSION.getCategoryName())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION.name())).collect(Collectors.toList())); } - private void populateDataErrors(ValidationCategory category, int totalCount, int expectedCount, Map> dataErrorsByCategory, List expectedList) + private void populateDataErrors(ValidationCategory validationCategory, ErrorCategory errorCategory, int totalCount, int expectedCount, Map> dataErrorsByCategory, List expectedList) { int count = 1; while (count <= totalCount) { - DataError dataError = getDummyDataError(category, count); - dataErrorsByCategory.get(category).add(dataError); + DataError dataError = getDummyDataError(errorCategory, count); + dataErrorsByCategory.get(validationCategory).add(dataError); if (count <= expectedCount) { expectedList.add(dataError); @@ -104,7 +105,7 @@ private void populateDataErrors(ValidationCategory category, int totalCount, int } } - private DataError getDummyDataError(ValidationCategory category, long rowNumber) + private DataError getDummyDataError(ErrorCategory category, long rowNumber) { Map errorDetails = new HashMap<>(); errorDetails.put(DataError.FILE_NAME, "some_file_name"); @@ -112,7 +113,7 @@ private DataError getDummyDataError(ValidationCategory category, long rowNumber) errorDetails.put(DataError.COLUMN_NAME, "some_column_name"); return DataError.builder() - .errorCategory(category.getCategoryName()) + .errorCategory(category.name()) .putAllErrorDetails(errorDetails) .errorRecord("some_data") .errorMessage("some_error_message") diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 2b4723eab4f..17e1332f023 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -195,5 +195,5 @@ public interface ConstructDatasetFromDatabase public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); - public abstract List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, org.finos.legend.engine.persistence.components.relational.SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount); + public abstract List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, org.finos.legend.engine.persistence.components.relational.SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java index c42997ec298..175a28f612c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java @@ -16,10 +16,22 @@ public enum ErrorCategory { - TYPE_CONVERSION, - CHECK_NULL_CONSTRAINT, - CHECK_OTHER_CONSTRAINT, - PARSING_ERROR, - FILE_NOT_FOUND, - UNKNOWN + TYPE_CONVERSION("Unable to type cast column"), + CHECK_NULL_CONSTRAINT("Null values found in non-nullable column"), // TODO: shall we change to exactly the same as snowflake's error message for null? + CHECK_OTHER_CONSTRAINT("Table constraints not fulfilled"), + PARSING_ERROR("Unable to parse file"), + FILE_NOT_FOUND("File not found in specified location"), + UNKNOWN("Unknown error"); + + private final String defaultErrorMessage; + + ErrorCategory(String defaultErrorMessage) + { + this.defaultErrorMessage = defaultErrorMessage; + } + + public String getDefaultErrorMessage() + { + return defaultErrorMessage; + } } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 3e0e8a8538a..1e7ee1e1b7c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -541,7 +541,7 @@ private List performDryRun() if (enrichedIngestMode instanceof BulkLoad) { executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); - List results = relationalSink().performDryRun(enrichedDatasets, transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount()); + List results = relationalSink().performDryRun(enrichedDatasets, transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount(), caseConversion()); executor.executePhysicalPlan(generatorResult.dryRunPostCleanupSqlPlan()); return results; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index da921dc8abf..d205e329cf0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -49,6 +49,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; @@ -97,8 +98,8 @@ import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CONVERSION; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CHECK_CONSTRAINT; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.TYPE_CONVERSION; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; public class H2Sink extends AnsiSqlSink { @@ -260,7 +261,33 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) + { + try + { + return performDryRunWithValidationQueries(datasets, transformer, executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount, caseConversion); + } + catch (Exception e) + { + return parseH2Exceptions(e); + } + } + + private List parseH2Exceptions(Exception e) + { + String errorMessage = e.getMessage(); + + if (errorMessage.contains("IO Exception")) + { + String fileName = extractProblematicValueFromErrorMessage(errorMessage); + Map errorDetails = buildErrorDetails(Optional.of(fileName), Optional.empty(), Optional.empty()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND.name()).errorMessage(ErrorCategory.FILE_NOT_FOUND.getDefaultErrorMessage()).putAllErrorDetails(errorDetails).build()); + } + + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN.name()).errorMessage(errorMessage).build()); + } + + public List performDryRunWithValidationQueries(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) { executor.executePhysicalPlan(dryRunSqlPlan); @@ -273,11 +300,11 @@ public List performDryRun(Datasets datasets, Transformer allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); - List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(CHECK_CONSTRAINT, new ArrayList<>()); - List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(CONVERSION, new ArrayList<>()); + List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUE, new ArrayList<>()); + List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(TYPE_CONVERSION, new ArrayList<>()); // Execute queries for null values - int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields); + int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); dataErrorsTotalCount += nullValuesErrorsCount; // Execute queries for datatype conversion @@ -289,8 +316,7 @@ public List performDryRun(Datasets datasets, Transformer performDryRun(Datasets datasets, Transformer> resultSets = results.get(0).getData(); for (Map row : resultSets) { - DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, CONVERSION, validatedColumn.fieldName()); - dataErrorsByCategory.get(CONVERSION).add(dataError); + DataError dataError = constructDataError(allFields, row, TYPE_CONVERSION, validatedColumn.fieldName(), caseConversion); + dataErrorsByCategory.get(TYPE_CONVERSION).add(dataError); dataErrorsTotalCount++; } } @@ -316,6 +342,7 @@ public List performDryRun(Datasets datasets, Transformer expectedErrorRecords = Arrays.asList(DataError.builder() - .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .errorRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name(), 1L)) .build(), DataError.builder() - .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .errorRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorRecord("{\"col_int\":\"2\",\"col_decimal\":\"NaN\",\"col_string\":null,\"col_datetime\":\"2022-01-12 00:00:00.0\"}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name(), 2L)) .build(), DataError.builder() - .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .errorRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col1.name(), 1L)) .build(), DataError.builder() - .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .errorRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col4.name(), 1L)) .build(), DataError.builder() - .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .errorRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorRecord("{\"col_int\":\"2\",\"col_decimal\":\"NaN\",\"col_string\":null,\"col_datetime\":\"2022-01-12 00:00:00.0\"}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col3.name(), 2L)) .build()); @@ -1053,28 +1054,28 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() // Checking dry run String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\"" + - "(\"COL_INT\" VARCHAR,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" VARCHAR,\"COL_DATETIME\" VARCHAR,\"FILE\" VARCHAR,\"ROW_NUMBER\" BIGINT)"; + "(\"COL_INT\" VARCHAR,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" VARCHAR,\"COL_DATETIME\" VARCHAR,\"LEGEND_PERSISTENCE_FILE\" VARCHAR,\"LEGEND_PERSISTENCE_ROW_NUMBER\" BIGINT)"; String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf"; String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" " + - "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"FILE\", \"ROW_NUMBER\") " + + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"LEGEND_PERSISTENCE_FILE\", \"LEGEND_PERSISTENCE_ROW_NUMBER\") " + "SELECT CONVERT(\"COL_INT\",VARCHAR),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",VARCHAR),CONVERT(\"COL_DATETIME\",VARCHAR),'src/test/resources/data/bulk-load/input/bad_file.csv',ROW_NUMBER() OVER () " + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; - String expectedDryRunNullValidationSql = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + String expectedDryRunNullValidationSql = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + "WHERE (MAIN_validation_lp_yosulf.\"COL_STRING\" IS NULL) OR (MAIN_validation_lp_yosulf.\"COL_DECIMAL\" IS NULL) LIMIT 3"; - String expectedDryRunDatatypeValidationSql1 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + String expectedDryRunDatatypeValidationSql1 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_INT\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_INT\" AS INTEGER) IS NULL) LIMIT 3"; - String expectedDryRunDatatypeValidationSql2 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + String expectedDryRunDatatypeValidationSql2 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_DECIMAL\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_DECIMAL\" AS DECIMAL(5,2)) IS NULL) LIMIT 3"; - String expectedDryRunDatatypeValidationSql3 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"FILE\",MAIN_validation_lp_yosulf.\"ROW_NUMBER\" " + + String expectedDryRunDatatypeValidationSql3 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_DATETIME\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_DATETIME\" AS TIMESTAMP) IS NULL) LIMIT 3"; @@ -1083,12 +1084,12 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); - Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).get(0).getTwo()); - Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.CHECK_CONSTRAINT).size()); - Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(0).getTwo()); - Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(1).getTwo()); - Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).get(2).getTwo()); - Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.CONVERSION).size()); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); @@ -1101,18 +1102,18 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() DryRunResult dryRunResult = ingestor.dryRun(); List expectedErrorRecords = Arrays.asList(DataError.builder() - .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .errorRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorRecord("{\"COL_STRING\":\"Andy\",\"COL_DATETIME\":\"2022-01-99 00:00:00.0\",\"COL_INT\":\"??\",\"COL_DECIMAL\":null}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name().toUpperCase(), 1L)) .build(), DataError.builder() - .errorCategory(ValidationCategory.CHECK_CONSTRAINT.getCategoryName()) - .errorRecord("2,,NaN,2022-01-12 00:00:00.0") + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorRecord("{\"COL_STRING\":null,\"COL_DATETIME\":\"2022-01-12 00:00:00.0\",\"COL_INT\":\"2\",\"COL_DECIMAL\":\"NaN\"}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name().toUpperCase(), 2L)) .build(), DataError.builder() - .errorCategory(ValidationCategory.CONVERSION.getCategoryName()) - .errorRecord("??,Andy,,2022-01-99 00:00:00.0") + .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorRecord("{\"COL_STRING\":\"Andy\",\"COL_DATETIME\":\"2022-01-99 00:00:00.0\",\"COL_INT\":\"??\",\"COL_DECIMAL\":null}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col1.name().toUpperCase(), 1L)) .build()); @@ -1121,6 +1122,121 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); } + @Test + public void testBulkLoadDryRunFailureWithFileNotFound() + { + String filePath = "src/test/resources/data/bulk-load/input/non_existent_file.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2NonNullable, col3NonNullable, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER,\"col_string\" VARCHAR NOT NULL,\"col_decimal\" DECIMAL(5,2) NOT NULL,\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/non_existent_file.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\"" + + "(\"col_int\" VARCHAR,\"col_string\" VARCHAR,\"col_decimal\" VARCHAR,\"col_datetime\" VARCHAR,\"legend_persistence_file\" VARCHAR,\"legend_persistence_row_number\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"legend_persistence_file\", \"legend_persistence_row_number\") " + + "SELECT CONVERT(\"col_int\",VARCHAR),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",VARCHAR),CONVERT(\"col_datetime\",VARCHAR)," + + "'src/test/resources/data/bulk-load/input/non_existent_file.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/non_existent_file.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + String expectedDryRunNullValidationSql = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (main_validation_lp_yosulf.\"col_string\" IS NULL) OR (main_validation_lp_yosulf.\"col_decimal\" IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_decimal\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_decimal\" AS DECIMAL(5,2)) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + List expectedErrorRecords = Arrays.asList(DataError.builder() + .errorCategory(ErrorCategory.FILE_NOT_FOUND.name()) + .errorMessage("File not found in specified location") + .putAllErrorDetails(buildErrorDetails(filePath)) + .build()); + + Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); + Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); + } + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, int sampleRowCount) { return RelationalIngestor.builder() @@ -1234,4 +1350,10 @@ private Map buildErrorDetails(String fileName, String columnName return errorDetails; } + private Map buildErrorDetails(String fileName) + { + Map errorDetails = new HashMap<>(); + errorDetails.put(DataError.FILE_NAME, fileName); + return errorDetails; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml index 667c7bcd5d4..ee35d0335d3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml @@ -68,6 +68,14 @@ + + + org.apache.commons + commons-csv + ${commons-csv.version} + + + org.slf4j diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 604641ec90c..b8cd5bebbb5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -16,8 +16,11 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; @@ -47,12 +50,15 @@ import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; import org.finos.legend.engine.persistence.components.relational.snowflake.optmizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.snowflake.optmizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.SnowflakeDataTypeMapping; @@ -86,6 +92,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.io.StringReader; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -101,12 +109,14 @@ import java.util.Queue; import java.util.Set; import java.util.ArrayList; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CHECK_CONSTRAINT; -import static org.finos.legend.engine.persistence.components.util.ValidationCategory.CONVERSION; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.TYPE_CONVERSION; public class SnowflakeSink extends AnsiSqlSink { @@ -126,14 +136,18 @@ public class SnowflakeSink extends AnsiSqlSink private static final String FIRST_ERROR = "first_error"; private static final String FIRST_ERROR_COLUMN_NAME = "first_error_column_name"; private static final String ERROR = "ERROR"; + protected static final String FILE_WITH_ERROR = "FILE"; + protected static final String ROW_NUMBER = "ROW_NUMBER"; private static final String LINE = "LINE"; private static final String CHARACTER = "CHARACTER"; private static final String BYTE_OFFSET = "BYTE_OFFSET"; private static final String CATEGORY = "CATEGORY"; private static final String COLUMN_NAME = "COLUMN_NAME"; private static final String ROW_START_LINE = "ROW_START_LINE"; - private static final String REJECTED_RECORD = "REJECTED_RECORD"; + private static final String FIELD_DELIMITER = "FIELD_DELIMITER"; + private static final String ESCAPE = "ESCAPE"; + private static final String FIELD_OPTIONALLY_ENCLOSED_BY = "FIELD_OPTIONALLY_ENCLOSED_BY"; static { @@ -261,19 +275,51 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } - public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) { - if (dryRunValidationSqlPlan == null || dryRunValidationSqlPlan.isEmpty()) + try { - return performDryRunWithValidationMode(executor, dryRunSqlPlan, sampleRowCount); + if (dryRunValidationSqlPlan == null || dryRunValidationSqlPlan.isEmpty()) + { + return performDryRunWithValidationMode(datasets, executor, dryRunSqlPlan, sampleRowCount); + } + else + { + return performDryRunWithValidationQueries(datasets, executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount, caseConversion); + } } - else + catch (Exception e) { - return performDryRunWithValidationQueries(datasets, executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount); + return parseSnowflakeExceptions(e); } } - private List performDryRunWithValidationMode(Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) + private List parseSnowflakeExceptions(Exception e) + { + String errorMessage = e.getMessage(); + + if (errorMessage.contains("Error parsing")) + { + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.PARSING_ERROR.name()).errorMessage(errorMessage).build()); + } + + if (errorMessage.contains("file") && errorMessage.contains("was not found")) + { + Optional fileName = Optional.empty(); + Matcher matcher = Pattern.compile("'(.*)'").matcher(errorMessage); + if (matcher.find()) + { + fileName = Optional.of(matcher.group(1)); + } + Map errorDetails = buildErrorDetails(fileName, Optional.empty(), Optional.empty()); + + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND.name()).errorMessage(errorMessage).putAllErrorDetails(errorDetails).build()); + } + + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN.name()).errorMessage(errorMessage).build()); + } + + private List performDryRunWithValidationMode(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) { List results = executor.executePhysicalPlanAndGetResults(dryRunSqlPlan, sampleRowCount); List dataErrors = new ArrayList<>(); @@ -288,10 +334,20 @@ private List performDryRunWithValidationMode(Executor errorDetails.put(DataError.CHARACTER_POSITION, characterPos)); DataError dataError = DataError.builder() - .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) + .errorMessage(parseSnowflakeErrorCategory(row)) .errorCategory(getString(row, CATEGORY).orElseThrow(IllegalStateException::new)) .putAllErrorDetails(errorDetails) - .errorRecord(getString(row, REJECTED_RECORD)) + .errorRecord(getString(row, REJECTED_RECORD).map(rejectedRecord -> + { + try + { + return parseSnowflakeRejectedRecord(datasets, rejectedRecord); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + })) .build(); dataErrors.add(dataError); } @@ -299,7 +355,85 @@ private List performDryRunWithValidationMode(Executor performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount) + private String parseSnowflakeErrorCategory(Map row) + { + String snowflakeErrorCategory = getString(row, CATEGORY).orElseThrow(IllegalStateException::new); + String errorMessage = getString(row, ERROR).orElseThrow(IllegalStateException::new); + + if (snowflakeErrorCategory.equals("conversion")) + { + return ErrorCategory.TYPE_CONVERSION.name(); + } + else if (snowflakeErrorCategory.equals("check_constraint")) + { + if (errorMessage.contains("NULL result in a non-nullable column")) + { + return ErrorCategory.CHECK_NULL_CONSTRAINT.name(); + } + else + { + return ErrorCategory.CHECK_OTHER_CONSTRAINT.name(); + } + } + else + { + return ErrorCategory.UNKNOWN.name(); // TODO: or shall we return snowflake's error category? + } + } + + public String parseSnowflakeRejectedRecord(Datasets datasets, String rejectedRecord) throws IOException + { + if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) + { + throw new IllegalStateException(""); + } + StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); + if (!(stagedFilesDataset.stagedFilesDatasetProperties() instanceof SnowflakeStagedFilesDatasetProperties)) + { + throw new IllegalStateException(""); + } + SnowflakeStagedFilesDatasetProperties snowflakeStagedFilesDatasetProperties = (SnowflakeStagedFilesDatasetProperties) stagedFilesDataset.stagedFilesDatasetProperties(); + if (!snowflakeStagedFilesDatasetProperties.fileFormat().isPresent() || !(snowflakeStagedFilesDatasetProperties.fileFormat().get() instanceof StandardFileFormat)) + { + throw new IllegalStateException(""); + } + StandardFileFormat standardFileFormat = (StandardFileFormat) snowflakeStagedFilesDatasetProperties.fileFormat().get(); + if (!standardFileFormat.formatType().equals(FileFormatType.CSV)) + { + throw new IllegalStateException(""); + } + + CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withQuote(null).withEscape(null); + Map formatOptions = standardFileFormat.formatOptions(); + if (formatOptions.containsKey(FIELD_DELIMITER)) + { + csvFormat = csvFormat.withDelimiter(getChar(formatOptions, FIELD_DELIMITER).orElseThrow(IllegalStateException::new)); + } + if (formatOptions.containsKey(ESCAPE)) + { + csvFormat = csvFormat.withEscape(getChar(formatOptions, ESCAPE).orElseThrow(IllegalStateException::new)); + } + if (formatOptions.containsKey(FIELD_OPTIONALLY_ENCLOSED_BY)) + { + csvFormat = csvFormat.withQuote(getChar(formatOptions, FIELD_OPTIONALLY_ENCLOSED_BY).orElseThrow(IllegalStateException::new)); + } + + List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + Map errorRecordMap = new HashMap<>(); + + List records = csvFormat.parse(new StringReader(rejectedRecord)).getRecords(); + for (CSVRecord csvRecord : records) + { + for (int i = 0; i < csvRecord.size(); i++) + { + errorRecordMap.put(allFields.get(i), csvRecord.get(i)); + } + } + + return new ObjectMapper().writeValueAsString(errorRecordMap); + } + + private List performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) { executor.executePhysicalPlan(dryRunSqlPlan); @@ -312,11 +446,11 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); - List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(CHECK_CONSTRAINT, new ArrayList<>()); - List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(CONVERSION, new ArrayList<>()); + List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUE, new ArrayList<>()); + List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(TYPE_CONVERSION, new ArrayList<>()); // Execute queries for null values - int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields); + int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); dataErrorsTotalCount += nullValuesErrorsCount; // Execute queries for datatype conversion @@ -331,8 +465,8 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex // This loop will only be executed once as there is always only one element in the set for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) { - DataError dataError = constructDataError(allFields, row, FILE_WITH_ERROR, ROW_NUMBER, CONVERSION, column); - dataErrorsByCategory.get(CONVERSION).add(dataError); + DataError dataError = constructDataError(allFields, row, TYPE_CONVERSION, column, caseConversion); + dataErrorsByCategory.get(TYPE_CONVERSION).add(dataError); dataErrorsTotalCount++; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java index 4540726ea49..5e64bb0f7f7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java @@ -14,8 +14,11 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.MetadataRowNumberValue; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; @@ -25,7 +28,19 @@ public class MetadataRowNumberFieldVisitor implements LogicalPlanVisitor Date: Fri, 8 Mar 2024 18:56:32 +0530 Subject: [PATCH 28/32] Handle Data Errors in Duplicates and Data version error cases --- .../DeriveDataErrorRowsLogicalPlan.java | 7 +- .../relational/ansi/AnsiSqlSink.java | 51 ++-------- .../util/DataErrorFairDistributionTest.java | 2 +- .../components/relational/api/ApiUtils.java | 67 +++++++++++++ .../relational/api/DataErrorAbstract.java | 2 +- .../relational/api/ErrorCategory.java | 4 +- .../api/RelationalIngestorAbstract.java | 15 ++- .../exception/DataQualityException.java | 13 +-- .../components/relational/h2/H2Sink.java | 7 +- .../ingestmode/bulkload/BulkLoadTest.java | 18 ++-- .../versioning/TestDedupAndVersioning.java | 96 ++++++++++--------- .../relational/snowflake/SnowflakeSink.java | 89 +++++++++-------- 12 files changed, 212 insertions(+), 159 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java index 836620188e0..44b156d092a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java @@ -30,6 +30,8 @@ public class DeriveDataErrorRowsLogicalPlan implements VersioningStrategyVisitor private Dataset tempStagingDataset; private int sampleRowCount; + public static final String DATA_VERSION_ERROR_COUNT = "legend_persistence_error_count"; + public DeriveDataErrorRowsLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset, int sampleRowCount) { this.primaryKeys = primaryKeys; @@ -72,7 +74,6 @@ public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersi private LogicalPlan getLogicalPlanForDataErrors(String versionField) { - String distinctRowCount = "legend_persistence_error_count"; List pKsAndVersion = new ArrayList<>(); for (String pk: primaryKeys) { @@ -89,7 +90,7 @@ private LogicalPlan getLogicalPlanForDataErrors(String versionField) FunctionImpl countDistinct = FunctionImpl.builder() .functionName(FunctionName.COUNT) .addValue(FunctionImpl.builder().functionName(FunctionName.DISTINCT).addAllValue(distinctValueFields).build()) - .alias(distinctRowCount) + .alias(DATA_VERSION_ERROR_COUNT) .build(); Selection selectDataError = Selection.builder() @@ -97,7 +98,7 @@ private LogicalPlan getLogicalPlanForDataErrors(String versionField) .groupByFields(pKsAndVersion) .addAllFields(pKsAndVersion) .addFields(countDistinct) - .havingCondition(GreaterThan.of(FieldValue.builder().fieldName(distinctRowCount).build(), ObjectValue.of(1))) + .havingCondition(GreaterThan.of(FieldValue.builder().fieldName(DATA_VERSION_ERROR_COUNT).build(), ObjectValue.of(1))) .limit(sampleRowCount) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 2c9ba6c5818..d188c0f1871 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -150,6 +150,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.TruncateVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.WindowFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ApiUtils; import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; @@ -165,6 +166,7 @@ import java.util.*; import java.util.stream.Collectors; +import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.buildErrorRecord; import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; public class AnsiSqlSink extends RelationalSink @@ -402,28 +404,13 @@ protected int findNullValuesDataErrors(Executor ex protected DataError constructDataError(List allColumns, Map row, ValidationCategory validationCategory, String validatedColumnName, CaseConversion caseConversion) { ErrorCategory errorCategory = getValidationFailedErrorCategory(validationCategory); - - String fileColumnName; - String rowNumberColumnName; - switch (caseConversion) - { - case TO_UPPER: - fileColumnName = FILE.toUpperCase(); - rowNumberColumnName = ROW_NUMBER.toUpperCase(); - break; - case TO_LOWER: - fileColumnName = FILE.toLowerCase(); - rowNumberColumnName = ROW_NUMBER.toLowerCase(); - break; - default: - fileColumnName = FILE; - rowNumberColumnName = ROW_NUMBER; - } + String fileColumnName = ApiUtils.convertCase(caseConversion, FILE); + String rowNumberColumnName = ApiUtils.convertCase(caseConversion, ROW_NUMBER); Map errorDetails = buildErrorDetails(getString(row, fileColumnName), Optional.of(validatedColumnName), getLong(row, rowNumberColumnName)); return DataError.builder() .errorMessage(errorCategory.getDefaultErrorMessage()) - .errorCategory(errorCategory.name()) + .errorCategory(errorCategory) .putAllErrorDetails(errorDetails) .errorRecord(buildErrorRecord(allColumns, row)) .build(); @@ -438,29 +425,6 @@ protected Map buildErrorDetails(Optional fileName, Optio return errorDetails; } - protected String buildErrorRecord(List allColumns, Map row) - { - Map errorRecordMap = new HashMap<>(); - - for (String column : allColumns) - { - if (row.containsKey(column)) - { - errorRecordMap.put(column, row.get(column)); - } - } - - ObjectMapper objectMapper = new ObjectMapper(); - try - { - return objectMapper.writeValueAsString(errorRecordMap); - } - catch (JsonProcessingException e) - { - throw new RuntimeException(e); - } - } - private ErrorCategory getValidationFailedErrorCategory(ValidationCategory validationCategory) { switch (validationCategory) @@ -490,10 +454,7 @@ public List getDataErrorsWithFairDistributionAcrossCategories(int sam { if (!dataErrorsByCategory.get(validationCategory).isEmpty()) { - if (fairlyDistributedDataErrors.size() < sampleRowCount) - { - fairlyDistributedDataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); - } + fairlyDistributedDataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); } else { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java index 31e1fa04d13..0dbfef97589 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -113,7 +113,7 @@ private DataError getDummyDataError(ErrorCategory category, long rowNumber) errorDetails.put(DataError.COLUMN_NAME, "some_column_name"); return DataError.builder() - .errorCategory(category.name()) + .errorCategory(category) .putAllErrorDetails(errorDetails) .errorRecord("some_data") .errorMessage("some_error_message") diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index 9288024b09d..b00c67bd67a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -31,6 +31,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.planner.Planner; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -41,6 +42,7 @@ import org.finos.legend.engine.persistence.components.util.MetadataDataset; import java.util.*; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; @@ -231,4 +233,69 @@ public static Optional getFirstColumnValue(Map row) } return object; } + + public static List constructDataQualityErrors(Dataset stagingDataset, List> dataErrors, + ErrorCategory errorCategory, CaseConversion caseConversion, String errorField, String errorDetailsKey) + { + List dataErrorList = new ArrayList<>(); + List allFields = stagingDataset.schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + String caseCorrectedErrorField = convertCase(caseConversion, errorField); + + for (Map dataError: dataErrors) + { + dataErrorList.add(DataError.builder() + .errorMessage(errorCategory.getDefaultErrorMessage()) + .errorCategory(errorCategory) + .errorRecord(buildErrorRecord(allFields, dataError)) + .putAllErrorDetails(buildErrorDetails(dataError, caseCorrectedErrorField, errorDetailsKey)) + .build()); + } + return dataErrorList; + } + + private static Map buildErrorDetails(Map dataError, String errorField, String errorDetailsKey) + { + Map errorDetails = new HashMap<>(); + Object errorDetailsValue = dataError.get(errorField); + errorDetails.put(errorDetailsKey, errorDetailsValue); + return errorDetails; + } + + + public static String convertCase(CaseConversion caseConversion, String value) + { + switch (caseConversion) + { + case TO_UPPER: + return value.toUpperCase(); + case TO_LOWER: + return value.toLowerCase(); + default: + return value; + } + } + + public static String buildErrorRecord(List allColumns, Map row) + { + Map errorRecordMap = new HashMap<>(); + + for (String column : allColumns) + { + if (row.containsKey(column)) + { + errorRecordMap.put(column, row.get(column)); + } + } + + ObjectMapper objectMapper = new ObjectMapper(); + try + { + return objectMapper.writeValueAsString(errorRecordMap); + } + catch (JsonProcessingException e) + { + throw new RuntimeException(e); + } + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java index dd151897075..69620c2f6ae 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java @@ -40,7 +40,7 @@ public interface DataErrorAbstract String errorMessage(); - String errorCategory(); + ErrorCategory errorCategory(); Optional errorRecord(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java index 175a28f612c..fc7adfe7feb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java @@ -21,7 +21,9 @@ public enum ErrorCategory CHECK_OTHER_CONSTRAINT("Table constraints not fulfilled"), PARSING_ERROR("Unable to parse file"), FILE_NOT_FOUND("File not found in specified location"), - UNKNOWN("Unknown error"); + UNKNOWN("Unknown error"), + DUPLICATES("Duplicate rows found"), + DATA_VERSION_ERROR("Data errors (same PK, same version but different data)"); private final String defaultErrorMessage; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 1e7ee1e1b7c..3d3ec68362b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -23,6 +23,8 @@ import org.finos.legend.engine.persistence.components.importer.Importer; import org.finos.legend.engine.persistence.components.importer.Importers; import org.finos.legend.engine.persistence.components.ingestmode.*; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicationHandler; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DeriveDataErrorRowsLogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; @@ -43,7 +45,6 @@ import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; -import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; import org.finos.legend.engine.persistence.components.util.SqlLogging; @@ -63,6 +64,8 @@ import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.TABLE_IS_NON_EMPTY; import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.*; import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.retrieveValueAsLong; +import static org.finos.legend.engine.persistence.components.relational.api.DataErrorAbstract.NUM_DATA_VERSION_ERRORS; +import static org.finos.legend.engine.persistence.components.relational.api.DataErrorAbstract.NUM_DUPLICATES; import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.BULK_LOAD_BATCH_STATUS_PATTERN; import static org.finos.legend.engine.persistence.components.transformer.Transformer.TransformOptionsAbstract.DATE_TIME_FORMATTER; @@ -461,7 +464,9 @@ public void dedupAndVersion() TabularData duplicateRows = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DUPLICATE_ROWS)).get(0); String errorMessage = "Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy"; LOGGER.error(errorMessage); - throw new DataQualityException(errorMessage, duplicateRows.getData()); + List dataErrors = ApiUtils.constructDataQualityErrors(enrichedDatasets.stagingDataset(), duplicateRows.getData(), + ErrorCategory.DUPLICATES, caseConversion(), DatasetDeduplicationHandler.COUNT, NUM_DUPLICATES); + throw new DataQualityException(errorMessage, dataErrors); } } @@ -474,10 +479,12 @@ public void dedupAndVersion() if (maxDataErrorsValue.isPresent() && maxDataErrorsValue.get() > 1) { // Find the data errors - TabularData dataErrors = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DATA_ERROR_ROWS)).get(0); + TabularData errors = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DATA_ERROR_ROWS)).get(0); String errorMessage = "Encountered Data errors (same PK, same version but different data), hence failing the batch"; LOGGER.error(errorMessage); - throw new DataQualityException(errorMessage, dataErrors.getData()); + List dataErrors = ApiUtils.constructDataQualityErrors(enrichedDatasets.stagingDataset(), errors.getData(), + ErrorCategory.DATA_VERSION_ERROR, caseConversion(), DeriveDataErrorRowsLogicalPlan.DATA_VERSION_ERROR_COUNT, NUM_DATA_VERSION_ERRORS); + throw new DataQualityException(errorMessage, dataErrors); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java index 84f0083f7dd..eb8c44cf446 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java @@ -14,21 +14,22 @@ package org.finos.legend.engine.persistence.components.relational.exception; +import org.finos.legend.engine.persistence.components.relational.api.DataError; + import java.util.List; -import java.util.Map; public class DataQualityException extends RuntimeException { - private List> sampleRows; + private List dataErrors; - public List> getSampleRows() + public List getDataErrors() { - return sampleRows; + return dataErrors; } - public DataQualityException(String message, List> sampleRows) + public DataQualityException(String message, List dataErrors) { super(message); - this.sampleRows = sampleRows; + this.dataErrors = dataErrors; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index d205e329cf0..7ce873f7d90 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -281,10 +281,10 @@ private List parseH2Exceptions(Exception e) { String fileName = extractProblematicValueFromErrorMessage(errorMessage); Map errorDetails = buildErrorDetails(Optional.of(fileName), Optional.empty(), Optional.empty()); - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND.name()).errorMessage(ErrorCategory.FILE_NOT_FOUND.getDefaultErrorMessage()).putAllErrorDetails(errorDetails).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(ErrorCategory.FILE_NOT_FOUND.getDefaultErrorMessage()).putAllErrorDetails(errorDetails).build()); } - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN.name()).errorMessage(errorMessage).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessage).build()); } public List performDryRunWithValidationQueries(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) @@ -304,8 +304,7 @@ public List performDryRunWithValidationQueries(Datasets datasets, Tra List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(TYPE_CONVERSION, new ArrayList<>()); // Execute queries for null values - int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); - dataErrorsTotalCount += nullValuesErrorsCount; + dataErrorsTotalCount += findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); // Execute queries for datatype conversion for (Pair, SqlPlan> pair : queriesForDatatype) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index f9698d631cf..cf42fe2b8cb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -965,27 +965,27 @@ public void testBulkLoadDryRunFailure() DryRunResult dryRunResult = ingestor.dryRun(); List expectedErrorRecords = Arrays.asList(DataError.builder() - .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name(), 1L)) .build(), DataError.builder() - .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) .errorRecord("{\"col_int\":\"2\",\"col_decimal\":\"NaN\",\"col_string\":null,\"col_datetime\":\"2022-01-12 00:00:00.0\"}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name(), 2L)) .build(), DataError.builder() - .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorCategory(ErrorCategory.TYPE_CONVERSION) .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col1.name(), 1L)) .build(), DataError.builder() - .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorCategory(ErrorCategory.TYPE_CONVERSION) .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col4.name(), 1L)) .build(), DataError.builder() - .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorCategory(ErrorCategory.TYPE_CONVERSION) .errorRecord("{\"col_int\":\"2\",\"col_decimal\":\"NaN\",\"col_string\":null,\"col_datetime\":\"2022-01-12 00:00:00.0\"}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col3.name(), 2L)) @@ -1102,17 +1102,17 @@ public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() DryRunResult dryRunResult = ingestor.dryRun(); List expectedErrorRecords = Arrays.asList(DataError.builder() - .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) .errorRecord("{\"COL_STRING\":\"Andy\",\"COL_DATETIME\":\"2022-01-99 00:00:00.0\",\"COL_INT\":\"??\",\"COL_DECIMAL\":null}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name().toUpperCase(), 1L)) .build(), DataError.builder() - .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT.name()) + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) .errorRecord("{\"COL_STRING\":null,\"COL_DATETIME\":\"2022-01-12 00:00:00.0\",\"COL_INT\":\"2\",\"COL_DECIMAL\":\"NaN\"}") .errorMessage("Null values found in non-nullable column") .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name().toUpperCase(), 2L)) .build(), DataError.builder() - .errorCategory(ErrorCategory.TYPE_CONVERSION.name()) + .errorCategory(ErrorCategory.TYPE_CONVERSION) .errorRecord("{\"COL_STRING\":\"Andy\",\"COL_DATETIME\":\"2022-01-99 00:00:00.0\",\"COL_INT\":\"??\",\"COL_DECIMAL\":null}") .errorMessage("Unable to type cast column") .putAllErrorDetails(buildErrorDetails(filePath, col1.name().toUpperCase(), 1L)) @@ -1228,7 +1228,7 @@ public void testBulkLoadDryRunFailureWithFileNotFound() DryRunResult dryRunResult = ingestor.dryRun(); List expectedErrorRecords = Arrays.asList(DataError.builder() - .errorCategory(ErrorCategory.FILE_NOT_FOUND.name()) + .errorCategory(ErrorCategory.FILE_NOT_FOUND) .errorMessage("File not found in specified location") .putAllErrorDetails(buildErrorDetails(filePath)) .build()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java index ce96f7ab760..26859a93fbb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.versioning; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; @@ -32,6 +34,8 @@ import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; import org.finos.legend.engine.persistence.components.relational.exception.DataQualityException; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; @@ -41,10 +45,7 @@ import org.junit.jupiter.api.Test; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import static org.finos.legend.engine.persistence.components.TestUtils.*; import static org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract.DATA_SPLIT; @@ -178,15 +179,14 @@ void testNoDedupMaxVersioning() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row1 = new HashMap<>(); row1.put("name", "Cathy"); row1.put("id", 3); row1.put("version", 1); - row1.put("legend_persistence_error_count", 2); - expectedSampleRows.add(row1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -242,15 +242,14 @@ void testNoDedupAllVersion() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row1 = new HashMap<>(); row1.put("name", "Cathy"); row1.put("id", 3); row1.put("version", 1); - row1.put("legend_persistence_error_count", 2); - expectedSampleRows.add(row1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -329,15 +328,14 @@ void testFilterDupsMaxVersion() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row1 = new HashMap<>(); row1.put("name", "Cathy"); row1.put("id", 3); row1.put("version", 1); - row1.put("legend_persistence_error_count", 2); - expectedSampleRows.add(row1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -400,15 +398,14 @@ void testFilterDupsAllVersion() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row1 = new HashMap<>(); row1.put("name", "Cathy"); row1.put("id", 3); row1.put("version", 1); - row1.put("legend_persistence_error_count", 2); - expectedSampleRows.add(row1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -434,19 +431,18 @@ void testFailOnDupsNoVersioning() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row1 = new HashMap<>(); row1.put("name", "Andy"); row1.put("id", 1); - row1.put("legend_persistence_count", 3); + Map row2 = new HashMap<>(); row2.put("name", "Becky"); row2.put("id", 2); - row2.put("legend_persistence_count", 2); - expectedSampleRows.add(row1); - expectedSampleRows.add(row2); + + DataError dataError1 = buildDataError(ErrorCategory.DUPLICATES, row1, buildErrorDetailsMap("num_duplicates", 3)); + DataError dataError2 = buildDataError(ErrorCategory.DUPLICATES, row2, buildErrorDetailsMap("num_duplicates", 2)); Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError1, dataError2), e.getDataErrors()); } } @@ -484,14 +480,13 @@ void testFailOnDupsMaxVersionDoNotPerform() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row = new HashMap<>(); row.put("name", "Becky"); row.put("id", 2); - row.put("legend_persistence_count", 2); - expectedSampleRows.add(row); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -529,14 +524,13 @@ void testFailOnDupsMaxVersion() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row = new HashMap<>(); row.put("name", "Becky"); row.put("id", 2); - row.put("legend_persistence_count", 2); - expectedSampleRows.add(row); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -577,14 +571,13 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row = new HashMap<>(); row.put("name", "Becky"); row.put("id", 2); - row.put("legend_persistence_count", 2); - expectedSampleRows.add(row); - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -624,14 +617,13 @@ void testFailOnDupsAllVersion() throws Exception } catch (DataQualityException e) { - List> expectedSampleRows = new ArrayList<>(); Map row = new HashMap<>(); row.put("name", "Becky"); row.put("id", 2); - row.put("legend_persistence_count", 2); - expectedSampleRows.add(row); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); - TestUtils.assertEquals(expectedSampleRows, e.getSampleRows()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -741,4 +733,22 @@ private String getTempStagingTableName(String ingestRunId) { return TableNameGenUtils.generateTableName(stagingTableName, TEMP_STAGING_DATASET_QUALIFIER, ingestRunId); } + + private Map buildErrorDetailsMap(String key, Object value) + { + Map errorDetailsMap = new HashMap<>(); + errorDetailsMap.put(key, value); + return errorDetailsMap; + } + + private DataError buildDataError(ErrorCategory errorCategory, Map row, Map errorDetailsMap) throws JsonProcessingException + { + DataError dataError = DataError.builder() + .errorMessage(errorCategory.getDefaultErrorMessage()) + .errorCategory(errorCategory) + .errorRecord(new ObjectMapper().writeValueAsString(row)) + .putAllErrorDetails(errorDetailsMap) + .build(); + return dataError; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index b8cd5bebbb5..dadcd71e8ca 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -140,14 +140,14 @@ public class SnowflakeSink extends AnsiSqlSink protected static final String ROW_NUMBER = "ROW_NUMBER"; private static final String LINE = "LINE"; private static final String CHARACTER = "CHARACTER"; - private static final String BYTE_OFFSET = "BYTE_OFFSET"; private static final String CATEGORY = "CATEGORY"; private static final String COLUMN_NAME = "COLUMN_NAME"; - private static final String ROW_START_LINE = "ROW_START_LINE"; private static final String REJECTED_RECORD = "REJECTED_RECORD"; private static final String FIELD_DELIMITER = "FIELD_DELIMITER"; private static final String ESCAPE = "ESCAPE"; private static final String FIELD_OPTIONALLY_ENCLOSED_BY = "FIELD_OPTIONALLY_ENCLOSED_BY"; + private static final String CATEGORY_CONVERSION = "conversion"; + private static final String CATEGORY_CHECK_CONSTRAINT = "check_constraint"; static { @@ -275,7 +275,9 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } - public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, + SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, + int sampleRowCount, CaseConversion caseConversion) { try { @@ -300,23 +302,23 @@ private List parseSnowflakeExceptions(Exception e) if (errorMessage.contains("Error parsing")) { - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.PARSING_ERROR.name()).errorMessage(errorMessage).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.PARSING_ERROR).errorMessage(errorMessage).build()); } if (errorMessage.contains("file") && errorMessage.contains("was not found")) { Optional fileName = Optional.empty(); - Matcher matcher = Pattern.compile("'(.*)'").matcher(errorMessage); + Matcher matcher = Pattern.compile("file '(.*)' was not found").matcher(errorMessage); if (matcher.find()) { fileName = Optional.of(matcher.group(1)); } Map errorDetails = buildErrorDetails(fileName, Optional.empty(), Optional.empty()); - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND.name()).errorMessage(errorMessage).putAllErrorDetails(errorDetails).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(errorMessage).putAllErrorDetails(errorDetails).build()); } - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN.name()).errorMessage(errorMessage).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessage).build()); } private List performDryRunWithValidationMode(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) @@ -334,8 +336,8 @@ private List performDryRunWithValidationMode(Datasets datasets, Execu getLong(row, CHARACTER).ifPresent(characterPos -> errorDetails.put(DataError.CHARACTER_POSITION, characterPos)); DataError dataError = DataError.builder() - .errorMessage(parseSnowflakeErrorCategory(row)) - .errorCategory(getString(row, CATEGORY).orElseThrow(IllegalStateException::new)) + .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) + .errorCategory(parseSnowflakeErrorCategory(row)) .putAllErrorDetails(errorDetails) .errorRecord(getString(row, REJECTED_RECORD).map(rejectedRecord -> { @@ -345,7 +347,8 @@ private List performDryRunWithValidationMode(Datasets datasets, Execu } catch (IOException e) { - throw new RuntimeException(e); + LOGGER.warn("Exception in parsing the record"); + return String.format("{\"%s\" : \"%s\"}", "unparsed_row", rejectedRecord); } })) .build(); @@ -355,56 +358,36 @@ private List performDryRunWithValidationMode(Datasets datasets, Execu return dataErrors; } - private String parseSnowflakeErrorCategory(Map row) + private ErrorCategory parseSnowflakeErrorCategory(Map row) { String snowflakeErrorCategory = getString(row, CATEGORY).orElseThrow(IllegalStateException::new); String errorMessage = getString(row, ERROR).orElseThrow(IllegalStateException::new); - if (snowflakeErrorCategory.equals("conversion")) + if (snowflakeErrorCategory.equals(CATEGORY_CONVERSION)) { - return ErrorCategory.TYPE_CONVERSION.name(); + return ErrorCategory.TYPE_CONVERSION; } - else if (snowflakeErrorCategory.equals("check_constraint")) + else if (snowflakeErrorCategory.equals(CATEGORY_CHECK_CONSTRAINT)) { if (errorMessage.contains("NULL result in a non-nullable column")) { - return ErrorCategory.CHECK_NULL_CONSTRAINT.name(); + return ErrorCategory.CHECK_NULL_CONSTRAINT; } else { - return ErrorCategory.CHECK_OTHER_CONSTRAINT.name(); + return ErrorCategory.CHECK_OTHER_CONSTRAINT; } } else { - return ErrorCategory.UNKNOWN.name(); // TODO: or shall we return snowflake's error category? + return ErrorCategory.UNKNOWN; } } public String parseSnowflakeRejectedRecord(Datasets datasets, String rejectedRecord) throws IOException { - if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) - { - throw new IllegalStateException(""); - } - StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); - if (!(stagedFilesDataset.stagedFilesDatasetProperties() instanceof SnowflakeStagedFilesDatasetProperties)) - { - throw new IllegalStateException(""); - } - SnowflakeStagedFilesDatasetProperties snowflakeStagedFilesDatasetProperties = (SnowflakeStagedFilesDatasetProperties) stagedFilesDataset.stagedFilesDatasetProperties(); - if (!snowflakeStagedFilesDatasetProperties.fileFormat().isPresent() || !(snowflakeStagedFilesDatasetProperties.fileFormat().get() instanceof StandardFileFormat)) - { - throw new IllegalStateException(""); - } - StandardFileFormat standardFileFormat = (StandardFileFormat) snowflakeStagedFilesDatasetProperties.fileFormat().get(); - if (!standardFileFormat.formatType().equals(FileFormatType.CSV)) - { - throw new IllegalStateException(""); - } - - CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withQuote(null).withEscape(null); - Map formatOptions = standardFileFormat.formatOptions(); + Map formatOptions = getFormatOptions(datasets); + CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote(null).withEscape(null); if (formatOptions.containsKey(FIELD_DELIMITER)) { csvFormat = csvFormat.withDelimiter(getChar(formatOptions, FIELD_DELIMITER).orElseThrow(IllegalStateException::new)); @@ -433,6 +416,30 @@ public String parseSnowflakeRejectedRecord(Datasets datasets, String rejectedRec return new ObjectMapper().writeValueAsString(errorRecordMap); } + private Map getFormatOptions(Datasets datasets) + { + if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) + { + throw new IllegalStateException("StagedFilesDataset expected"); + } + StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); + if (!(stagedFilesDataset.stagedFilesDatasetProperties() instanceof SnowflakeStagedFilesDatasetProperties)) + { + throw new IllegalStateException("SnowflakeStagedFilesDatasetProperties expected"); + } + SnowflakeStagedFilesDatasetProperties snowflakeStagedFilesDatasetProperties = (SnowflakeStagedFilesDatasetProperties) stagedFilesDataset.stagedFilesDatasetProperties(); + if (!snowflakeStagedFilesDatasetProperties.fileFormat().isPresent() || !(snowflakeStagedFilesDatasetProperties.fileFormat().get() instanceof StandardFileFormat)) + { + throw new IllegalStateException("StandardFileFormat expected"); + } + StandardFileFormat standardFileFormat = (StandardFileFormat) snowflakeStagedFilesDatasetProperties.fileFormat().get(); + if (!standardFileFormat.formatType().equals(FileFormatType.CSV)) + { + throw new IllegalStateException("CSV format expected"); + } + return standardFileFormat.formatOptions(); + } + private List performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) { executor.executePhysicalPlan(dryRunSqlPlan); @@ -450,9 +457,7 @@ private List performDryRunWithValidationQueries(Datasets datasets, Ex List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(TYPE_CONVERSION, new ArrayList<>()); // Execute queries for null values - int nullValuesErrorsCount = findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); - dataErrorsTotalCount += nullValuesErrorsCount; - + dataErrorsTotalCount += findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); // Execute queries for datatype conversion for (Pair, SqlPlan> pair : queriesForDatatype) { From 342ccfcee3a1d2d2d98be54ebcc99b25fdd7a415 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 11 Mar 2024 11:58:04 +0800 Subject: [PATCH 29/32] Fix tests --- .../components/relational/ansi/AnsiSqlSink.java | 5 ++++- .../util/DataErrorFairDistributionTest.java | 12 ++++++------ .../components/relational/api/ErrorCategory.java | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index d188c0f1871..f6858734490 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -454,7 +454,10 @@ public List getDataErrorsWithFairDistributionAcrossCategories(int sam { if (!dataErrorsByCategory.get(validationCategory).isEmpty()) { - fairlyDistributedDataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); + if (fairlyDistributedDataErrors.size() < sampleRowCount) + { + fairlyDistributedDataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); + } } else { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java index 0dbfef97589..c8efa1ff460 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -46,8 +46,8 @@ public void testTotalErrorsSmallerThanSampleRowCount() List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 10, dataErrorsByCategory); Assertions.assertEquals(10, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT.name())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT)).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION)).collect(Collectors.toList())); } @Test @@ -66,8 +66,8 @@ public void testExhaustingOneCategory() List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 55, dataErrorsByCategory); Assertions.assertEquals(20, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT.name())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT)).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION)).collect(Collectors.toList())); } @Test @@ -86,8 +86,8 @@ public void testExhaustingBothCategories() List results = sink.getDataErrorsWithFairDistributionAcrossCategories(19, 35, dataErrorsByCategory); Assertions.assertEquals(19, results.size()); - Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT.name())).collect(Collectors.toList())); - Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION.name())).collect(Collectors.toList())); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT)).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION)).collect(Collectors.toList())); } private void populateDataErrors(ValidationCategory validationCategory, ErrorCategory errorCategory, int totalCount, int expectedCount, Map> dataErrorsByCategory, List expectedList) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java index fc7adfe7feb..9c12bf39d9a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java @@ -17,7 +17,7 @@ public enum ErrorCategory { TYPE_CONVERSION("Unable to type cast column"), - CHECK_NULL_CONSTRAINT("Null values found in non-nullable column"), // TODO: shall we change to exactly the same as snowflake's error message for null? + CHECK_NULL_CONSTRAINT("Null values found in non-nullable column"), CHECK_OTHER_CONSTRAINT("Table constraints not fulfilled"), PARSING_ERROR("Unable to parse file"), FILE_NOT_FOUND("File not found in specified location"), From 78ed3a930bac905b4b281873db8e1ee8233a3173 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 11 Mar 2024 16:45:59 +0530 Subject: [PATCH 30/32] Provided an interface for init Datasets to pass the ingestRunId --- .../api/RelationalIngestorAbstract.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 3d3ec68362b..edf90ec390e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -245,7 +245,16 @@ public void initExecutor(Executor executor) */ public Datasets initDatasets(Datasets datasets) { - return enrichDatasetsAndGenerateOperations(datasets); + String ingestRunId = UUID.randomUUID().toString(); + return enrichDatasetsAndGenerateOperations(datasets, ingestRunId); + } + + /* + - Initializes Datasets with a provided ingestRunId + */ + public Datasets initDatasets(Datasets datasets, String ingestRunId) + { + return enrichDatasetsAndGenerateOperations(datasets, ingestRunId); } /* @@ -607,7 +616,7 @@ private List performFullIngestion(RelationalConnection connectio } - private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) + private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets, String ingestRunId) { LOGGER.info("Initializing Datasets"); // Validation: init(Connection) must have been invoked @@ -615,8 +624,9 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) { throw new IllegalStateException("Executor not initialized, call init(Connection) before invoking this method!"); } + // 0. Set the run id - ingestRunId = UUID.randomUUID().toString(); + this.ingestRunId = ingestRunId; // 1. Case handling enrichedIngestMode = ApiUtils.applyCase(ingestMode(), caseConversion()); From 40c6df61acbc5c53dd58e2d1530f1793a1b24bd9 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 11 Mar 2024 19:57:11 +0530 Subject: [PATCH 31/32] Clean up the ingest run id generation --- .../api/RelationalIngestorAbstract.java | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index edf90ec390e..5b4a8e678ee 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -176,6 +176,12 @@ public int sampleRowCount() return 20; } + @Derived + public String getIngestRunId() + { + return UUID.randomUUID().toString(); + } + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); @@ -204,7 +210,6 @@ protected TransformOptions transformOptions() boolean mainDatasetExists; private Planner planner; private boolean datasetsInitialized = false; - private String ingestRunId; // ---------- API ---------- @@ -245,16 +250,7 @@ public void initExecutor(Executor executor) */ public Datasets initDatasets(Datasets datasets) { - String ingestRunId = UUID.randomUUID().toString(); - return enrichDatasetsAndGenerateOperations(datasets, ingestRunId); - } - - /* - - Initializes Datasets with a provided ingestRunId - */ - public Datasets initDatasets(Datasets datasets, String ingestRunId) - { - return enrichDatasetsAndGenerateOperations(datasets, ingestRunId); + return enrichDatasetsAndGenerateOperations(datasets); } /* @@ -376,11 +372,6 @@ public List getLatestStagingFilters(RelationalConnection connecti return ApiUtils.extractDatasetFilters(metadataDataset, executor, physicalPlan); } - public String getIngestRunId() - { - return this.ingestRunId; - } - // ---------- UTILITY METHODS ---------- private void validateDatasetsInitialization() @@ -616,7 +607,7 @@ private List performFullIngestion(RelationalConnection connectio } - private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets, String ingestRunId) + private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) { LOGGER.info("Initializing Datasets"); // Validation: init(Connection) must have been invoked @@ -625,9 +616,6 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets, String i throw new IllegalStateException("Executor not initialized, call init(Connection) before invoking this method!"); } - // 0. Set the run id - this.ingestRunId = ingestRunId; - // 1. Case handling enrichedIngestMode = ApiUtils.applyCase(ingestMode(), caseConversion()); enrichedDatasets = ApiUtils.enrichAndApplyCase(datasets, caseConversion()); @@ -693,7 +681,7 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets, String i .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) .sampleRowCount(sampleRowCount()) - .ingestRunId(ingestRunId) + .ingestRunId(getIngestRunId()) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, generator.plannerOptions(), relationalSink().capabilities()); @@ -785,7 +773,7 @@ private Datasets importExternalDataset(Datasets datasets) DatasetReference mainDataSetReference = datasets.mainDataset().datasetReference(); externalDatasetReference = externalDatasetReference - .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : TableNameGenUtils.generateTableName(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING, ingestRunId)) + .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : TableNameGenUtils.generateTableName(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING, getIngestRunId())) .withDatabase(externalDatasetReference.database().isPresent() ? externalDatasetReference.database().get() : mainDataSetReference.database().orElse(null)) .withGroup(externalDatasetReference.group().isPresent() ? externalDatasetReference.group().get() : mainDataSetReference.group().orElse(null)) .withAlias(externalDatasetReference.alias().isPresent() ? externalDatasetReference.alias().get() : mainDataSetReference.alias().orElseThrow(RuntimeException::new) + UNDERSCORE + STAGING); From bea23261af23def0792dbdaa2a6c2596f4247a0e Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 13 Mar 2024 11:21:13 +0530 Subject: [PATCH 32/32] Fix regex in H2 Sink to catch the exceptions --- .../components/relational/api/ApiUtils.java | 18 ++++++++ .../components/relational/h2/H2Sink.java | 45 ++++++++++--------- .../relational/snowflake/SnowflakeSink.java | 30 ++++++++----- 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index b00c67bd67a..b5955b52ccb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -42,6 +42,8 @@ import org.finos.legend.engine.persistence.components.util.MetadataDataset; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; @@ -298,4 +300,20 @@ public static String buildErrorRecord(List allColumns, Map findToken(String message, String regex, int group) + { + Optional token = Optional.empty(); + Matcher matcher = Pattern.compile(regex).matcher(message); + if (matcher.find()) + { + token = Optional.of(matcher.group(group)); + } + return token; + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 7ce873f7d90..1bffbd37bb8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -49,6 +49,8 @@ import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; +import org.finos.legend.engine.persistence.components.relational.api.ApiUtils; import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; @@ -79,7 +81,6 @@ import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; -import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; import org.finos.legend.engine.persistence.components.util.ValidationCategory; @@ -276,15 +277,16 @@ public List performDryRun(Datasets datasets, Transformer parseH2Exceptions(Exception e) { String errorMessage = e.getMessage(); + String errorMessageWithoutLineBreak = ApiUtils.removeLineBreaks(errorMessage); if (errorMessage.contains("IO Exception")) { - String fileName = extractProblematicValueFromErrorMessage(errorMessage); - Map errorDetails = buildErrorDetails(Optional.of(fileName), Optional.empty(), Optional.empty()); + Optional fileName = extractProblematicValueFromErrorMessage(errorMessage); + Map errorDetails = buildErrorDetails(fileName, Optional.empty(), Optional.empty()); return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(ErrorCategory.FILE_NOT_FOUND.getDefaultErrorMessage()).putAllErrorDetails(errorDetails).build()); } - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessage).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessageWithoutLineBreak).build()); } public List performDryRunWithValidationQueries(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) @@ -315,20 +317,22 @@ public List performDryRunWithValidationQueries(Datasets datasets, Tra } catch (RuntimeException e) { - String problematicValue = extractProblematicValueFromErrorMessage(e.getCause().getMessage()); - - // This loop will only be executed once as there is always only one element in the set - for (FieldValue validatedColumn : pair.getOne()) + Optional problematicValue = extractProblematicValueFromErrorMessage(e.getCause().getMessage()); + if (problematicValue.isPresent()) { - List results = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(LogicalPlanFactory.getLogicalPlanForSelectAllFieldsWithStringFieldEquals(validatedColumn, problematicValue)), sampleRowCount); - if (!results.isEmpty()) + // This loop will only be executed once as there is always only one element in the set + for (FieldValue validatedColumn : pair.getOne()) { - List> resultSets = results.get(0).getData(); - for (Map row : resultSets) + List results = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(LogicalPlanFactory.getLogicalPlanForSelectAllFieldsWithStringFieldEquals(validatedColumn, problematicValue.get())), sampleRowCount); + if (!results.isEmpty()) { - DataError dataError = constructDataError(allFields, row, TYPE_CONVERSION, validatedColumn.fieldName(), caseConversion); - dataErrorsByCategory.get(TYPE_CONVERSION).add(dataError); - dataErrorsTotalCount++; + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + DataError dataError = constructDataError(allFields, row, TYPE_CONVERSION, validatedColumn.fieldName(), caseConversion); + dataErrorsByCategory.get(TYPE_CONVERSION).add(dataError); + dataErrorsTotalCount++; + } } } } @@ -339,21 +343,22 @@ public List performDryRunWithValidationQueries(Datasets datasets, Tra return getDataErrorsWithFairDistributionAcrossCategories(sampleRowCount, dataErrorsTotalCount, dataErrorsByCategory); } - private String extractProblematicValueFromErrorMessage(String errorMessage) + private Optional extractProblematicValueFromErrorMessage(String errorMessage) { errorMessage = errorMessage.substring(0, errorMessage.indexOf("; SQL statement")); + Optional value = Optional.empty(); if (errorMessage.contains("Data conversion error")) { - return errorMessage.replaceFirst("org.h2.jdbc.JdbcSQLDataException: Data conversion error converting ", "").replaceAll("\"", ""); + value = ApiUtils.findToken(errorMessage, "Data conversion error converting \"(.*)\"", 1); } else if (errorMessage.contains("Cannot parse")) { - return errorMessage.replaceFirst("org.h2.jdbc.JdbcSQLDataException: Cannot parse \"(.*)\" constant ", "").replaceAll("\"", ""); + value = ApiUtils.findToken(errorMessage, "Cannot parse \"(.*)\" constant \"(.*)\"", 2); } else if (errorMessage.contains("IO Exception")) { - return errorMessage.replaceFirst("org.h2.jdbc.JdbcSQLNonTransientException: IO Exception: \"IOException reading ", "").replaceAll("\"", ""); + value = ApiUtils.findToken(errorMessage, "IO Exception: \"IOException reading (.*)\"", 1); } - return errorMessage; + return value; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index dadcd71e8ca..565010e49a6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -54,6 +54,7 @@ import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; +import org.finos.legend.engine.persistence.components.relational.api.ApiUtils; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; @@ -109,8 +110,6 @@ import java.util.Queue; import java.util.Set; import java.util.ArrayList; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; @@ -148,6 +147,7 @@ public class SnowflakeSink extends AnsiSqlSink private static final String FIELD_OPTIONALLY_ENCLOSED_BY = "FIELD_OPTIONALLY_ENCLOSED_BY"; private static final String CATEGORY_CONVERSION = "conversion"; private static final String CATEGORY_CHECK_CONSTRAINT = "check_constraint"; + private static final String CATEGORY_OTHER = "other"; static { @@ -299,26 +299,21 @@ public List performDryRun(Datasets datasets, Transformer parseSnowflakeExceptions(Exception e) { String errorMessage = e.getMessage(); + String errorMessageWithoutLineBreaks = ApiUtils.removeLineBreaks(e.getMessage()); if (errorMessage.contains("Error parsing")) { - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.PARSING_ERROR).errorMessage(errorMessage).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.PARSING_ERROR).errorMessage(errorMessageWithoutLineBreaks).build()); } if (errorMessage.contains("file") && errorMessage.contains("was not found")) { - Optional fileName = Optional.empty(); - Matcher matcher = Pattern.compile("file '(.*)' was not found").matcher(errorMessage); - if (matcher.find()) - { - fileName = Optional.of(matcher.group(1)); - } + Optional fileName = ApiUtils.findToken(errorMessage, "file '(.*)' was not found", 1); Map errorDetails = buildErrorDetails(fileName, Optional.empty(), Optional.empty()); - - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(errorMessage).putAllErrorDetails(errorDetails).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(errorMessageWithoutLineBreaks).putAllErrorDetails(errorDetails).build()); } - return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessage).build()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessageWithoutLineBreaks).build()); } private List performDryRunWithValidationMode(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) @@ -378,6 +373,17 @@ else if (snowflakeErrorCategory.equals(CATEGORY_CHECK_CONSTRAINT)) return ErrorCategory.CHECK_OTHER_CONSTRAINT; } } + else if (snowflakeErrorCategory.equals(CATEGORY_OTHER)) + { + if (errorMessage.contains("file") && errorMessage.contains("was not found")) + { + return ErrorCategory.FILE_NOT_FOUND; + } + else + { + return ErrorCategory.UNKNOWN; + } + } else { return ErrorCategory.UNKNOWN;