diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml index 65f950b235a..e79f36c5d4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/pom.xml @@ -42,5 +42,15 @@ jackson-databind + + + org.eclipse.collections + eclipse-collections-api + + + org.eclipse.collections + eclipse-collections + + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorSqlType.java similarity index 86% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorSqlType.java index 60c193d7938..adda9e48e60 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorSqlType.java @@ -14,8 +14,10 @@ package org.finos.legend.engine.persistence.components.common; -public enum DedupAndVersionErrorStatistics +public enum DedupAndVersionErrorSqlType { MAX_DUPLICATES, - MAX_DATA_ERRORS; + DUPLICATE_ROWS, + MAX_DATA_ERRORS, + DATA_ERROR_ROWS; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java new file mode 100644 index 00000000000..44b156d092a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java @@ -0,0 +1,108 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; + +import java.util.ArrayList; +import java.util.List; + +public class DeriveDataErrorRowsLogicalPlan implements VersioningStrategyVisitor +{ + private List primaryKeys; + private List remainingColumns; + private Dataset tempStagingDataset; + private int sampleRowCount; + + public static final String DATA_VERSION_ERROR_COUNT = "legend_persistence_error_count"; + + public DeriveDataErrorRowsLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset, int sampleRowCount) + { + this.primaryKeys = primaryKeys; + this.remainingColumns = remainingColumns; + this.tempStagingDataset = tempStagingDataset; + this.sampleRowCount = sampleRowCount; + } + + @Override + public LogicalPlan visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public LogicalPlan visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + if (maxVersionStrategy.performStageVersioning()) + { + return getLogicalPlanForDataErrors(maxVersionStrategy.versioningField()); + } + else + { + return null; + } + } + + @Override + public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (allVersionsStrategyAbstract.performStageVersioning()) + { + return getLogicalPlanForDataErrors(allVersionsStrategyAbstract.versioningField()); + } + else + { + return null; + } + } + + private LogicalPlan getLogicalPlanForDataErrors(String versionField) + { + List pKsAndVersion = new ArrayList<>(); + for (String pk: primaryKeys) + { + pKsAndVersion.add(FieldValue.builder().fieldName(pk).build()); + } + pKsAndVersion.add(FieldValue.builder().fieldName(versionField).build()); + + List distinctValueFields = new ArrayList<>(); + for (String field: remainingColumns) + { + distinctValueFields.add(FieldValue.builder().fieldName(field).build()); + } + + FunctionImpl countDistinct = FunctionImpl.builder() + .functionName(FunctionName.COUNT) + .addValue(FunctionImpl.builder().functionName(FunctionName.DISTINCT).addAllValue(distinctValueFields).build()) + .alias(DATA_VERSION_ERROR_COUNT) + .build(); + + Selection selectDataError = Selection.builder() + .source(tempStagingDataset) + .groupByFields(pKsAndVersion) + .addAllFields(pKsAndVersion) + .addFields(countDistinct) + .havingCondition(GreaterThan.of(FieldValue.builder().fieldName(DATA_VERSION_ERROR_COUNT).build(), ObjectValue.of(1))) + .limit(sampleRowCount) + .build(); + + return LogicalPlan.builder().addOps(selectDataError).build(); + } + +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveMaxDataErrorLogicalPlan.java similarity index 92% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveMaxDataErrorLogicalPlan.java index 86cd4c59e4e..e5c1e2faf78 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveMaxDataErrorLogicalPlan.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.versioning; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; @@ -25,14 +25,14 @@ import java.util.ArrayList; import java.util.List; -public class DeriveDataErrorCheckLogicalPlan implements VersioningStrategyVisitor +public class DeriveMaxDataErrorLogicalPlan implements VersioningStrategyVisitor { List primaryKeys; List remainingColumns; Dataset tempStagingDataset; - public DeriveDataErrorCheckLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset) + public DeriveMaxDataErrorLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset) { this.primaryKeys = primaryKeys; this.remainingColumns = remainingColumns; @@ -73,7 +73,7 @@ public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersi private LogicalPlan getLogicalPlanForDataErrorCheck(String versionField) { - String maxDataErrorAlias = DedupAndVersionErrorStatistics.MAX_DATA_ERRORS.name(); + String maxDataErrorAlias = DedupAndVersionErrorSqlType.MAX_DATA_ERRORS.name(); String distinctRowCount = "legend_persistence_distinct_rows"; List pKsAndVersion = new ArrayList<>(); for (String pk: primaryKeys) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java index 9199836cb2d..a20ddfd49be 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.logicalplan; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -116,4 +117,14 @@ public static LogicalPlan getLogicalPlanForMaxOfField(Dataset dataset, String fi .source(dataset).build(); return LogicalPlan.builder().addOps(selection).build(); } + + public static LogicalPlan getLogicalPlanForSelectAllFieldsWithStringFieldEquals(FieldValue field, String fieldValue) + { + Selection selection = Selection.builder() + .addFields(All.INSTANCE) + .source(field.datasetRef()) + .condition(Equals.of(field, StringValue.of(fieldValue))) + .build(); + return LogicalPlan.builder().addOps(selection).build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java index 917525b2fc8..60530bc2288 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java @@ -47,6 +47,8 @@ public interface SelectionAbstract extends Dataset, Operation Optional> groupByFields(); + Optional havingCondition(); + Optional alias(); Optional limit(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java index 8dae01e0dc5..b6270d52b60 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java @@ -24,6 +24,12 @@ public interface StagedFilesDatasetProperties List filePatterns(); + @Value.Derived + default boolean validationModeSupported() + { + return false; + } + @Value.Check default void validate() { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java index 82e5876ed42..cadd43c5839 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java @@ -21,7 +21,6 @@ import java.util.List; import static org.immutables.value.Value.Immutable; -import static org.immutables.value.Value.Parameter; import static org.immutables.value.Value.Style; @Immutable @@ -34,15 +33,17 @@ ) public interface CopyAbstract extends Operation { - @Parameter(order = 0) Dataset targetDataset(); - @Parameter(order = 1) Dataset sourceDataset(); - @Parameter(order = 2) List fields(); - @Parameter(order = 3) StagedFilesDatasetProperties stagedFilesDatasetProperties(); + + @org.immutables.value.Value.Default + default boolean validationMode() + { + return false; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java new file mode 100644 index 00000000000..95e1a9dff6d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java @@ -0,0 +1,33 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface MetadataFileNameFieldAbstract extends Value +{ + StagedFilesDatasetProperties stagedFilesDatasetProperties(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java new file mode 100644 index 00000000000..23f0e7fdfa6 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java @@ -0,0 +1,33 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface MetadataRowNumberFieldAbstract extends Value +{ + StagedFilesDatasetProperties stagedFilesDatasetProperties(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java new file mode 100644 index 00000000000..11222de8584 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java @@ -0,0 +1,38 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Parameter; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface TryCastFunctionAbstract extends Value +{ + @Parameter(order = 0) + Value field(); + + @Parameter(order = 1) + FieldType type(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index ad882e365b8..9a9b9b57a06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.planner; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; @@ -21,16 +23,26 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenerationHandler; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.IsNull; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Not; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Or; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.ExternalDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -38,26 +50,34 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; +import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_DATASET_BASE_NAME; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.UNDERSCORE; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_DATASET_ALIAS; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_DATASET_QUALIFIER; class BulkLoadPlanner extends Planner { private boolean transformWhileCopy; private Dataset externalDataset; + private Dataset validationDataset; private StagedFilesDataset stagedFilesDataset; + private static final String FILE = "legend_persistence_file"; + private static final String ROW_NUMBER = "legend_persistence_row_number"; + BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { super(datasets, ingestMode, plannerOptions, capabilities); @@ -74,14 +94,20 @@ class BulkLoadPlanner extends Planner transformWhileCopy = capabilities.contains(Capability.TRANSFORM_WHILE_COPY); if (!transformWhileCopy) { + String externalDatasetName = TableNameGenUtils.generateTableName(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)), TEMP_DATASET_QUALIFIER, options().ingestRunId()); externalDataset = ExternalDataset.builder() .stagedFilesDataset(stagedFilesDataset) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) - .name(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)) + UNDERSCORE + TEMP_DATASET_BASE_NAME) - .alias(TEMP_DATASET_BASE_NAME) + .name(externalDatasetName) + .alias(TEMP_DATASET_ALIAS) .build(); } + + if (capabilities.contains(Capability.DRY_RUN)) + { + validationDataset = stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported() ? getValidationModeDataset() : getGenericValidationDataset(); + } } private void validateNoPrimaryKeysInStageAndMain() @@ -112,6 +138,137 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } } + /* + ------------------ + Validation Mode Logic: + ------------------ + COPY INTO temp_table (data_columns) + SELECT data_columns from staging + WITH VALIDATION_MODE = true + + ------------------ + Generic Approach Logic: + ------------------ + modified_data_columns: nullable data_columns with String datatype + meta_columns: file_name, row_number + + COPY INTO temp_table (modified_data_columns, meta_columns) + SELECT modified_data_columns, meta_columns from staging + */ + @Override + public LogicalPlan buildLogicalPlanForDryRun(Resources resources) + { + if (!capabilities.contains(Capability.DRY_RUN)) + { + return LogicalPlan.of(Collections.emptyList()); + } + + List operations = new ArrayList<>(); + + if (stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) + { + Copy copy = Copy.builder() + .targetDataset(validationDataset) + .sourceDataset(stagedFilesDataset.datasetReference().withAlias("")) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .validationMode(true) + .build(); + operations.add(copy); + } + else + { + operations.add(Delete.builder().dataset(validationDataset).build()); + + List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValuesWithVarCharType(stagingDataset()); + fieldsToSelect.add(MetadataFileNameField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); + fieldsToSelect.add(MetadataRowNumberField.builder().stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()).build()); + + List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); + fieldsToInsert.add(FieldValue.builder().fieldName(FILE).datasetRef(stagingDataset().datasetReference()).build()); + fieldsToInsert.add(FieldValue.builder().fieldName(ROW_NUMBER).datasetRef(stagingDataset().datasetReference()).build()); + + Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); + + Copy copy = Copy.builder() + .targetDataset(validationDataset) + .sourceDataset(selectStage) + .addAllFields(fieldsToInsert) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .validationMode(false) + .build(); + operations.add(copy); + } + return LogicalPlan.of(operations); + } + + /* + ------------------ + Validation Mode Logic: + ------------------ + NOT APPLICABLE + + ------------------ + Generic Approach Logic: + ------------------ + For null values: + SELECT * FROM temp_table WHERE + (non_nullable_data_column_1 = NULL + OR non_nullable_data_column_2 = NULL + OR ...) + + For datatype conversion: + SELECT * FROM temp_table WHERE (non_string_data_column_1 != NULL AND TRY_CAST(non_string_data_column_1 AS datatype) = NULL) + SELECT * FROM temp_table WHERE (non_string_data_column_2 != NULL AND TRY_CAST(non_string_data_column_2 AS datatype) = NULL) + ... + */ + public Map, LogicalPlan>>> buildLogicalPlanForDryRunValidation(Resources resources) + { + if (!capabilities.contains(Capability.DRY_RUN) || stagedFilesDataset.stagedFilesDatasetProperties().validationModeSupported()) + { + return Collections.emptyMap(); + } + Map, LogicalPlan>>> validationMap = new HashMap<>(); + List fieldsToCheckForNull = stagingDataset().schema().fields().stream().filter(field -> !field.nullable()).collect(Collectors.toList()); + List fieldsToCheckForDatatype = stagingDataset().schema().fields().stream().filter(field -> !DataType.isStringDatatype(field.type().dataType())).collect(Collectors.toList()); + + if (!fieldsToCheckForNull.isEmpty()) + { + Selection queryForNull = Selection.builder() + .source(validationDataset) + .condition(Or.of(fieldsToCheckForNull.stream().map(field -> + IsNull.of(FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build())) + .collect(Collectors.toList()))) + .limit(options().sampleRowCount()) + .build(); + + validationMap.put(ValidationCategory.NULL_VALUE, + Collections.singletonList(Tuples.pair(fieldsToCheckForNull.stream().map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForNull))))); + } + + if (!fieldsToCheckForDatatype.isEmpty()) + { + validationMap.put(ValidationCategory.TYPE_CONVERSION, new ArrayList<>()); + + for (Field fieldToCheckForDatatype : fieldsToCheckForDatatype) + { + Selection queryForDatatype = Selection.builder() + .source(validationDataset) + .condition(And.builder() + .addConditions(Not.of(IsNull.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(validationDataset.datasetReference()).build()))) + .addConditions(IsNull.of(TryCastFunction.of(FieldValue.builder().fieldName(fieldToCheckForDatatype.name()).datasetRef(validationDataset.datasetReference()).build(), fieldToCheckForDatatype.type()))) + .build()) + .limit(options().sampleRowCount()) + .build(); + + validationMap.get(ValidationCategory.TYPE_CONVERSION).add(Tuples.pair(Stream.of(fieldToCheckForDatatype).map(field -> FieldValue.builder().fieldName(field.name()).datasetRef(validationDataset.datasetReference()).build()).collect(Collectors.toSet()), + LogicalPlan.of(Collections.singletonList(queryForDatatype)))); + } + } + + return validationMap; + } + private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); @@ -131,7 +288,13 @@ private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) } Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); - return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert, stagedFilesDataset.stagedFilesDatasetProperties()))); + return LogicalPlan.of(Collections.singletonList( + Copy.builder() + .targetDataset(mainDataset()) + .sourceDataset(selectStage) + .addAllFields(fieldsToInsert) + .stagedFilesDatasetProperties(stagedFilesDataset.stagedFilesDatasetProperties()) + .build())); } private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) @@ -176,6 +339,17 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) return LogicalPlan.of(operations); } + @Override + public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) + { + List operations = new ArrayList<>(); + if (capabilities.contains(Capability.DRY_RUN)) + { + operations.add(Create.of(true, validationDataset)); + } + return LogicalPlan.of(operations); + } + @Override public LogicalPlan buildLogicalPlanForPostActions(Resources resources) { @@ -195,6 +369,17 @@ public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) return LogicalPlan.of(operations); } + @Override + public LogicalPlan buildLogicalPlanForDryRunPostCleanup(Resources resources) + { + List operations = new ArrayList<>(); + if (capabilities.contains(Capability.DRY_RUN)) + { + operations.add(Drop.of(true, validationDataset, false)); + } + return LogicalPlan.of(operations); + } + @Override List getDigestOrRemainingColumns() { @@ -248,4 +433,33 @@ protected void addPostRunStatsForRowsDeleted(Map pos { // Not supported at the moment } + + private Dataset getValidationModeDataset() + { + String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); + String validationDatasetName = TableNameGenUtils.generateTableName(tableName, "validation", options().ingestRunId()); + return DatasetDefinition.builder() + .schema(stagedFilesDataset.schema()) + .database(mainDataset().datasetReference().database()) + .group(mainDataset().datasetReference().group()) + .name(validationDatasetName) + .build(); + } + + private Dataset getGenericValidationDataset() + { + String tableName = mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); + String validationDatasetName = TableNameGenUtils.generateTableName(tableName, "validation", options().ingestRunId()); + + List fields = stagedFilesDataset.schema().fields().stream().map(field -> field.withType(FieldType.builder().dataType(DataType.VARCHAR).build()).withNullable(true)).collect(Collectors.toList()); + fields.add(Field.builder().name(FILE).type(FieldType.builder().dataType(DataType.VARCHAR).build()).build()); + fields.add(Field.builder().name(ROW_NUMBER).type(FieldType.builder().dataType(DataType.BIGINT).build()).build()); + + return DatasetDefinition.builder() + .schema(stagedFilesDataset.schema().withFields(fields)) + .database(mainDataset().datasetReference().database()) + .group(mainDataset().datasetReference().group()) + .name(validationDatasetName) + .build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 65c3f9fe32e..fd6878c393f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -18,9 +18,10 @@ import java.util.function.Consumer; import java.util.stream.Collectors; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.Resources; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitor; @@ -32,6 +33,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; @@ -43,6 +45,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.All; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.ObjectValue; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; @@ -52,6 +55,7 @@ import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.util.ArrayList; import java.util.List; @@ -60,6 +64,7 @@ import java.util.Set; import java.util.Collections; import java.util.HashMap; +import java.util.UUID; import static org.finos.legend.engine.persistence.components.common.StatisticName.INCOMING_RECORD_COUNT; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_DELETED; @@ -124,6 +129,18 @@ default String batchSuccessStatusValue() { return MetadataUtils.MetaTableStatus.DONE.toString(); } + + @Default + default int sampleRowCount() + { + return 20; + } + + @Default + default String ingestRunId() + { + return UUID.randomUUID().toString(); + } } private final Datasets datasets; @@ -172,7 +189,7 @@ private Optional getTempStagingDataset() Optional tempStagingDataset = Optional.empty(); if (isTempTableNeededForStaging) { - tempStagingDataset = Optional.of(LogicalPlanUtils.getTempStagingDatasetDefinition(originalStagingDataset(), ingestMode)); + tempStagingDataset = Optional.of(LogicalPlanUtils.getTempStagingDatasetDefinition(originalStagingDataset(), ingestMode, options().ingestRunId())); } return tempStagingDataset; } @@ -256,6 +273,26 @@ protected PlannerOptions options() public abstract LogicalPlan buildLogicalPlanForIngest(Resources resources); + public LogicalPlan buildLogicalPlanForDryRun(Resources resources) + { + return LogicalPlan.of(Collections.emptyList()); + } + + public Map, LogicalPlan>>> buildLogicalPlanForDryRunValidation(Resources resources) + { + return Collections.emptyMap(); + } + + public LogicalPlan buildLogicalPlanForDryRunPreActions(Resources resources) + { + return LogicalPlan.of(Collections.emptyList()); + } + + public LogicalPlan buildLogicalPlanForDryRunPostCleanup(Resources resources) + { + return LogicalPlan.of(Collections.emptyList()); + } + public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { // Save staging filters into batch_source_info column @@ -380,41 +417,65 @@ public Map buildLogicalPlanForPostRunStatistics(Reso return postRunStatisticsResult; } - public Map buildLogicalPlanForDeduplicationAndVersioningErrorChecks(Resources resources) + public Map buildLogicalPlanForDeduplicationAndVersioningErrorChecks(Resources resources) { - Map dedupAndVersioningErrorChecks = new HashMap<>(); + Map dedupAndVersioningErrorChecks = new HashMap<>(); addMaxDuplicatesErrorCheck(dedupAndVersioningErrorChecks); addDataErrorCheck(dedupAndVersioningErrorChecks); return dedupAndVersioningErrorChecks; } - protected void addMaxDuplicatesErrorCheck(Map dedupAndVersioningErrorChecks) + protected void addMaxDuplicatesErrorCheck(Map dedupAndVersioningErrorChecks) { if (ingestMode.deduplicationStrategy() instanceof FailOnDuplicates) { + FieldValue count = FieldValue.builder().datasetRef(tempStagingDataset().datasetReference()).fieldName(COUNT).build(); FunctionImpl maxCount = FunctionImpl.builder() .functionName(FunctionName.MAX) - .addValue(FieldValue.builder().datasetRef(tempStagingDataset().datasetReference()).fieldName(COUNT).build()) - .alias(DedupAndVersionErrorStatistics.MAX_DUPLICATES.name()) + .addValue(count) + .alias(DedupAndVersionErrorSqlType.MAX_DUPLICATES.name()) .build(); Selection selectMaxDupsCount = Selection.builder() .source(tempStagingDataset()) .addFields(maxCount) .build(); LogicalPlan maxDuplicatesCountPlan = LogicalPlan.builder().addOps(selectMaxDupsCount).build(); - dedupAndVersioningErrorChecks.put(DedupAndVersionErrorStatistics.MAX_DUPLICATES, maxDuplicatesCountPlan); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.MAX_DUPLICATES, maxDuplicatesCountPlan); + + /* + select pks from tempStagingDataset where COUNT > 1 + */ + List rowsToSelect = this.primaryKeys.stream().map(field -> FieldValue.builder().fieldName(field).build()).collect(Collectors.toList()); + if (rowsToSelect.size() > 0) + { + rowsToSelect.add(FieldValue.builder().fieldName(COUNT).build()); + Selection selectDuplicatesRows = Selection.builder() + .source(tempStagingDataset()) + .addAllFields(rowsToSelect) + .condition(GreaterThan.of(count, ObjectValue.of(1))) + .limit(options().sampleRowCount()) + .build(); + LogicalPlan selectDuplicatesRowsPlan = LogicalPlan.builder().addOps(selectDuplicatesRows).build(); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.DUPLICATE_ROWS, selectDuplicatesRowsPlan); + } } } - protected void addDataErrorCheck(Map dedupAndVersioningErrorChecks) + protected void addDataErrorCheck(Map dedupAndVersioningErrorChecks) { List remainingColumns = getDigestOrRemainingColumns(); if (ingestMode.versioningStrategy().accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED)) { - LogicalPlan logicalPlan = ingestMode.versioningStrategy().accept(new DeriveDataErrorCheckLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); - if (logicalPlan != null) + LogicalPlan logicalPlanForDataErrorCheck = ingestMode.versioningStrategy().accept(new DeriveMaxDataErrorLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); + if (logicalPlanForDataErrorCheck != null) + { + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS, logicalPlanForDataErrorCheck); + } + + LogicalPlan logicalPlanForDataErrors = ingestMode.versioningStrategy().accept(new DeriveDataErrorRowsLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset(), options().sampleRowCount())); + if (logicalPlanForDataErrors != null) { - dedupAndVersioningErrorChecks.put(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS, logicalPlan); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS, logicalPlanForDataErrors); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index f99f9f94a23..438dd9219ac 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -22,5 +22,6 @@ public enum Capability EXPLICIT_DATA_TYPE_CONVERSION, DATA_TYPE_LENGTH_CHANGE, DATA_TYPE_SCALE_CHANGE, - TRANSFORM_WHILE_COPY; + TRANSFORM_WHILE_COPY, + DRY_RUN } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index ba83a307734..da880eee09c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -62,7 +62,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.UUID; import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.BIGINT; @@ -72,10 +71,13 @@ import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.FLOAT; import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.INT; import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.INTEGER; +import static org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.VARCHAR; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_BULK_LOAD_EVENT_ID; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_FILE_PATHS; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_FILE_PATTERNS; import static org.finos.legend.engine.persistence.components.util.MetadataUtils.BATCH_SOURCE_INFO_STAGING_FILTERS; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_STAGING_DATASET_ALIAS; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_STAGING_DATASET_QUALIFIER; public class LogicalPlanUtils @@ -86,19 +88,12 @@ public class LogicalPlanUtils public static final String DATA_SPLIT_UPPER_BOUND_PLACEHOLDER = "{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}"; public static final String UNDERSCORE = "_"; public static final String TEMP_DATASET_BASE_NAME = "legend_persistence_temp"; - public static final String TEMP_STAGING_DATASET_BASE_NAME = "legend_persistence_temp_staging"; public static final String TEMP_DATASET_WITH_DELETE_INDICATOR_BASE_NAME = "legend_persistence_tempWithDeleteIndicator"; private LogicalPlanUtils() { } - public static String generateTableNameWithSuffix(String tableName, String suffix) - { - UUID uuid = UUID.randomUUID(); - return tableName + UNDERSCORE + suffix + UNDERSCORE + uuid; - } - public static Value INFINITE_BATCH_ID() { return InfiniteBatchIdValue.builder().build(); @@ -408,19 +403,35 @@ public static List extractStagedFilesFieldValues(Dataset dataset) int iter = 1; for (Field field : dataset.schema().fields()) { - StagedFilesFieldValue fieldValue = StagedFilesFieldValue.builder() - .columnNumber(columnNumbersPresent ? field.columnNumber().get() : iter++) - .datasetRefAlias(dataset.datasetReference().alias()) - .alias(field.fieldAlias().isPresent() ? field.fieldAlias().get() : field.name()) - .elementPath(field.elementPath()) - .fieldType(field.type()) - .fieldName(field.name()) - .build(); - stagedFilesFields.add(fieldValue); + stagedFilesFields.add(getStagedFilesFieldValueWithType(dataset, field, field.type(), columnNumbersPresent, iter++)); + } + return stagedFilesFields; + } + + public static List extractStagedFilesFieldValuesWithVarCharType(Dataset dataset) + { + List stagedFilesFields = new ArrayList<>(); + boolean columnNumbersPresent = dataset.schema().fields().stream().allMatch(field -> field.columnNumber().isPresent()); + int iter = 1; + for (Field field : dataset.schema().fields()) + { + stagedFilesFields.add(getStagedFilesFieldValueWithType(dataset, field, FieldType.builder().dataType(VARCHAR).build(), columnNumbersPresent, iter++)); } return stagedFilesFields; } + public static StagedFilesFieldValue getStagedFilesFieldValueWithType(Dataset dataset, Field field, FieldType fieldType, boolean columnNumbersPresent, int counter) + { + return StagedFilesFieldValue.builder() + .columnNumber(columnNumbersPresent ? field.columnNumber().get() : counter) + .datasetRefAlias(dataset.datasetReference().alias()) + .alias(field.fieldAlias().isPresent() ? field.fieldAlias().get() : field.name()) + .elementPath(field.elementPath()) + .fieldType(fieldType) + .fieldName(field.name()) + .build(); + } + public static Dataset getTempDataset(Datasets datasets) { String mainDatasetName = datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); @@ -455,10 +466,10 @@ public static Dataset getTempDatasetWithDeleteIndicator(Datasets datasets, Strin } } - public static Dataset getTempStagingDatasetDefinition(Dataset stagingDataset, IngestMode ingestMode) + public static Dataset getTempStagingDatasetDefinition(Dataset stagingDataset, IngestMode ingestMode, String ingestRunId) { - String alias = stagingDataset.datasetReference().alias().orElse(TEMP_STAGING_DATASET_BASE_NAME); - String datasetName = stagingDataset.datasetReference().name().orElseThrow(IllegalStateException::new) + UNDERSCORE + TEMP_STAGING_DATASET_BASE_NAME; + String alias = stagingDataset.datasetReference().alias().orElse(TEMP_STAGING_DATASET_ALIAS); + String datasetName = TableNameGenUtils.generateTableName(stagingDataset.datasetReference().name().orElseThrow(IllegalStateException::new), TEMP_STAGING_DATASET_QUALIFIER, ingestRunId); SchemaDefinition tempStagingSchema = ingestMode.versioningStrategy().accept(new DeriveTempStagingSchemaDefinition(stagingDataset.schema(), ingestMode.deduplicationStrategy())); return DatasetDefinition.builder() .schema(tempStagingSchema) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java new file mode 100644 index 00000000000..27911dab26e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtils.java @@ -0,0 +1,40 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.UNDERSCORE; + +public class TableNameGenUtils +{ + public static String LEGEND_PERSISTENCE_MARKER = "lp"; + public static final String TEMP_DATASET_QUALIFIER = "temp"; + public static final String TEMP_DATASET_ALIAS = "legend_persistence_temp"; + public static final String TEMP_STAGING_DATASET_QUALIFIER = "temp_staging"; + public static final String TEMP_STAGING_DATASET_ALIAS = "legend_persistence_temp_staging"; + + private static String generateTableSuffix(String ingestRunId) + { + int hashCode = Math.abs(ingestRunId.hashCode()); + return LEGEND_PERSISTENCE_MARKER + UNDERSCORE + Integer.toString(hashCode, 36); + } + + /* + Table name = __lp_ + */ + public static String generateTableName(String baseTableName, String qualifier, String ingestRunId) + { + return baseTableName + UNDERSCORE + qualifier + UNDERSCORE + generateTableSuffix(ingestRunId); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java new file mode 100644 index 00000000000..c2fbb123b81 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/ValidationCategory.java @@ -0,0 +1,21 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +public enum ValidationCategory +{ + NULL_VALUE, + TYPE_CONVERSION +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java index 543a2076f4f..3ce2231fdc7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java @@ -31,6 +31,8 @@ public interface Executor executePhysicalPlanAndGetResults(P physicalPlan); + List executePhysicalPlanAndGetResults(P physicalPlan, int rows); + List executePhysicalPlanAndGetResults(P physicalPlan, Map placeholderKeyValues); boolean datasetExists(Dataset dataset); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java index c347996f81f..3a6ae90fe35 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/RelationalExecutionHelper.java @@ -42,6 +42,8 @@ public interface RelationalExecutionHelper void executeStatements(List sqls); + List> executeQuery(String sql, int rows); + List> executeQuery(String sql); void close(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml index aa9b6d78a19..bcc2389bce9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/pom.xml @@ -51,6 +51,12 @@ com.fasterxml.jackson.core jackson-databind + + + org.eclipse.collections + eclipse-collections-api + + org.finos.legend.engine diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 972f6179de7..f6858734490 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.relational.ansi; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; @@ -147,26 +149,34 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.TabularValuesVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.TruncateVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.WindowFunctionVisitor; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ApiUtils; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; +import java.util.stream.Collectors; + +import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.buildErrorRecord; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; public class AnsiSqlSink extends RelationalSink { private static final RelationalSink INSTANCE; protected static final Map, LogicalPlanVisitor> LOGICAL_PLAN_VISITOR_BY_CLASS; + private static final String FILE = "legend_persistence_file"; + private static final String ROW_NUMBER = "legend_persistence_row_number"; + static { Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -329,4 +339,133 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) + { + throw new UnsupportedOperationException("DryRun not supported!"); + } + + protected Optional getString(Map row, String key) + { + Object value = row.get(key); + String strValue = value == null ? null : (String) value; + return Optional.ofNullable(strValue); + } + + protected Optional getLong(Map row, String key) + { + Object value = row.get(key); + Long longValue = value == null ? null : (Long) value; + return Optional.ofNullable(longValue); + } + + protected Optional getChar(Map row, String key) + { + Object value = row.get(key); + if (value instanceof Character) + { + Character charValue = value == null ? null : (Character) value; + return Optional.ofNullable(charValue); + } + if (value instanceof String) + { + Optional stringValue = getString(row, key); + return stringValue.map(s -> s.charAt(0)); + } + return Optional.empty(); + } + + protected int findNullValuesDataErrors(Executor executor, List, SqlPlan>> queriesForNull, Map> dataErrorsByCategory, List allFields, CaseConversion caseConversion) + { + int errorsCount = 0; + for (org.eclipse.collections.api.tuple.Pair, SqlPlan> pair : queriesForNull) + { + List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + { + if (row.get(column) == null) + { + DataError dataError = constructDataError(allFields, row, NULL_VALUE, column, caseConversion); + dataErrorsByCategory.get(NULL_VALUE).add(dataError); + errorsCount++; + } + } + } + } + } + return errorsCount; + } + + protected DataError constructDataError(List allColumns, Map row, ValidationCategory validationCategory, String validatedColumnName, CaseConversion caseConversion) + { + ErrorCategory errorCategory = getValidationFailedErrorCategory(validationCategory); + String fileColumnName = ApiUtils.convertCase(caseConversion, FILE); + String rowNumberColumnName = ApiUtils.convertCase(caseConversion, ROW_NUMBER); + Map errorDetails = buildErrorDetails(getString(row, fileColumnName), Optional.of(validatedColumnName), getLong(row, rowNumberColumnName)); + + return DataError.builder() + .errorMessage(errorCategory.getDefaultErrorMessage()) + .errorCategory(errorCategory) + .putAllErrorDetails(errorDetails) + .errorRecord(buildErrorRecord(allColumns, row)) + .build(); + } + + protected Map buildErrorDetails(Optional fileName, Optional columnName, Optional recordNumber) + { + Map errorDetails = new HashMap<>(); + fileName.ifPresent(file -> errorDetails.put(DataError.FILE_NAME, file)); + columnName.ifPresent(col -> errorDetails.put(DataError.COLUMN_NAME, col)); + recordNumber.ifPresent(rowNum -> errorDetails.put(DataError.RECORD_NUMBER, rowNum)); + return errorDetails; + } + + private ErrorCategory getValidationFailedErrorCategory(ValidationCategory validationCategory) + { + switch (validationCategory) + { + case NULL_VALUE: + return ErrorCategory.CHECK_NULL_CONSTRAINT; + case TYPE_CONVERSION: + return ErrorCategory.TYPE_CONVERSION; + default: + throw new IllegalStateException("Unsupported validation category"); + } + } + + public List getDataErrorsWithFairDistributionAcrossCategories(int sampleRowCount, int dataErrorsTotalCount, Map> dataErrorsByCategory) + { + if (dataErrorsTotalCount <= sampleRowCount) + { + return dataErrorsByCategory.values().stream().flatMap(Collection::stream).collect(Collectors.toList()); + } + + List fairlyDistributedDataErrors = new ArrayList<>(); + List eligibleCategories = new ArrayList<>(Arrays.asList(ValidationCategory.values())); + + while (fairlyDistributedDataErrors.size() < sampleRowCount && !eligibleCategories.isEmpty()) + { + for (ValidationCategory validationCategory : eligibleCategories) + { + if (!dataErrorsByCategory.get(validationCategory).isEmpty()) + { + if (fairlyDistributedDataErrors.size() < sampleRowCount) + { + fairlyDistributedDataErrors.add(dataErrorsByCategory.get(validationCategory).poll()); + } + } + else + { + eligibleCategories.remove(validationCategory); + } + } + } + + return fairlyDistributedDataErrors; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java index 5e574ac08de..a7bbf116d29 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java @@ -41,8 +41,8 @@ public VisitorResult visit(PhysicalPlanNode prev, Selection current, VisitorCont prev.push(selectStatement); List logicalPlanNodeList = new ArrayList<>(); - List conditions = new ArrayList<>(); - current.condition().ifPresent(conditions::add); + List whereConditions = new ArrayList<>(); + current.condition().ifPresent(whereConditions::add); if (current.source().isPresent()) { @@ -57,14 +57,14 @@ select id from (select * from table where condition) { DerivedDataset derivedDataset = (DerivedDataset) dataset; Condition filterCondition = LogicalPlanUtils.getDatasetFilterCondition(derivedDataset); - conditions.add(filterCondition); + whereConditions.add(filterCondition); logicalPlanNodeList.add(derivedDataset.datasetReference()); } else if (dataset instanceof FilteredDataset) { FilteredDataset filteredDataset = (FilteredDataset) dataset; Condition filterCondition = filteredDataset.filter(); - conditions.add(filterCondition); + whereConditions.add(filterCondition); logicalPlanNodeList.add(filteredDataset.datasetReference()); } else @@ -89,12 +89,14 @@ else if (dataset instanceof FilteredDataset) selectStatement.setLimit(current.limit().get()); } - if (!conditions.isEmpty()) + if (!whereConditions.isEmpty()) { - logicalPlanNodeList.add(And.of(conditions)); + selectStatement.setHasWhereCondition(true); + logicalPlanNodeList.add(And.of(whereConditions)); } current.groupByFields().ifPresent(logicalPlanNodeList::addAll); + current.havingCondition().ifPresent(logicalPlanNodeList::add); current.quantifier().ifPresent(logicalPlanNodeList::add); return new VisitorResult(selectStatement, logicalPlanNodeList); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java index 7131c44d12b..36297acf02e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java @@ -221,21 +221,21 @@ public static String getDropTempTableQuery(String tableName) "\"batch_id\" INTEGER," + "PRIMARY KEY (\"id\", \"name\", \"batch_update_time\"))"; - public static String expectedBaseTempStagingTablePlusDigest = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigest = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + "\"biz_date\" DATE," + "\"digest\" VARCHAR)"; - public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + "\"biz_date\" DATE," + "\"legend_persistence_count\" INTEGER)"; - public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -244,7 +244,7 @@ public static String getDropTempTableQuery(String tableName) "\"version\" INTEGER," + "\"legend_persistence_count\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -252,7 +252,7 @@ public static String getDropTempTableQuery(String tableName) "\"digest\" VARCHAR," + "\"legend_persistence_count\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\"" + + public static String expectedBaseTempStagingTablePlusDigestWithCountUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\"" + "(\"ID\" INTEGER NOT NULL," + "\"NAME\" VARCHAR NOT NULL," + "\"AMOUNT\" DOUBLE," + @@ -260,7 +260,7 @@ public static String getDropTempTableQuery(String tableName) "\"DIGEST\" VARCHAR," + "\"LEGEND_PERSISTENCE_COUNT\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithVersionUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\"" + + public static String expectedBaseTempStagingTablePlusDigestWithVersionUpperCase = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\"" + "(\"ID\" INTEGER NOT NULL," + "\"NAME\" VARCHAR NOT NULL," + "\"AMOUNT\" DOUBLE," + @@ -268,7 +268,7 @@ public static String getDropTempTableQuery(String tableName) "\"DIGEST\" VARCHAR," + "\"VERSION\" INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"(" + + public static String expectedBaseTempStagingTablePlusDigestWithDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"(" + "\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -276,7 +276,7 @@ public static String getDropTempTableQuery(String tableName) "\"digest\" VARCHAR," + "\"data_split\" INTEGER NOT NULL)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -285,7 +285,7 @@ public static String getDropTempTableQuery(String tableName) "\"legend_persistence_count\" INTEGER," + "\"data_split\" INTEGER NOT NULL)"; - public static String expectedBaseTempStagingTablePlusDigestWithDataSplitAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTablePlusDigestWithDataSplitAndCount = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -494,8 +494,8 @@ public static String getDropTempTableQuery(String tableName) "PRIMARY KEY (\"id\", \"name\", \"validity_from_reference\"))"; public static String expectedStagingCleanupQuery = "DELETE FROM \"mydb\".\"staging\" as stage"; - public static String expectedTempStagingCleanupQuery = "DELETE FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage"; - public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage"; + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS \"mydb\".\"staging\" CASCADE"; public static String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata " + @@ -535,7 +535,7 @@ public static String getDropTempTableQuery(String tableName) "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'SUCCEEDED'," + "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; - public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER " + @@ -544,13 +544,13 @@ public static String getDropTempTableQuery(String tableName) "\"mydb\".\"staging\" as stage GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\") as stage) as stage " + "WHERE stage.\"legend_persistence_rank\" = 1)"; - public static String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\")"; - public static String expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as " + "\"legend_persistence_count\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + @@ -559,13 +559,13 @@ public static String getDropTempTableQuery(String tableName) "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\") as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\")"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER " + @@ -574,7 +574,7 @@ public static String getDropTempTableQuery(String tableName) "\"mydb\".\"staging\" as stage GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\") as stage) as stage " + "WHERE stage.\"legend_persistence_rank\" = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",DENSE_RANK() " + @@ -582,7 +582,7 @@ public static String getDropTempTableQuery(String tableName) "FROM \"mydb\".\"staging\" as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1)"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicatesUpperCase = "INSERT INTO " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"LEGEND_PERSISTENCE_COUNT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"LEGEND_PERSISTENCE_COUNT\" as \"LEGEND_PERSISTENCE_COUNT\" FROM " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"LEGEND_PERSISTENCE_COUNT\" as \"LEGEND_PERSISTENCE_COUNT\",DENSE_RANK() " + @@ -590,34 +590,50 @@ public static String getDropTempTableQuery(String tableName) "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",COUNT(*) as \"LEGEND_PERSISTENCE_COUNT\" FROM \"MYDB\".\"STAGING\" as stage " + "GROUP BY stage.\"ID\", stage.\"NAME\", stage.\"AMOUNT\", stage.\"BIZ_DATE\", stage.\"DIGEST\") as stage) as stage WHERE stage.\"LEGEND_PERSISTENCE_RANK\" = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"legend_persistence_count\", \"data_split\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\") as stage)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"data_split\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + "FROM \"mydb\".\"staging\" as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.\"legend_persistence_count\") as \"MAX_DUPLICATES\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage"; + + public static String dupRowsSql = "SELECT \"id\",\"name\",\"legend_persistence_count\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + + "WHERE stage.\"legend_persistence_count\" > 1 LIMIT 20"; public static String dataErrorCheckSqlWithBizDateVersion = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + "(SELECT COUNT(DISTINCT(\"digest\")) as \"legend_persistence_distinct_rows\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; public static String dataErrorCheckSql = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + "(SELECT COUNT(DISTINCT(\"digest\")) as \"legend_persistence_distinct_rows\" FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage GROUP BY \"id\", \"name\", \"version\") as stage"; + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"version\") as stage"; public static String dataErrorCheckSqlUpperCase = "SELECT MAX(\"LEGEND_PERSISTENCE_DISTINCT_ROWS\") as \"MAX_DATA_ERRORS\" FROM" + " (SELECT COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_DISTINCT_ROWS\" FROM " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage GROUP BY \"ID\", \"NAME\", \"VERSION\") as stage"; + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage GROUP BY \"ID\", \"NAME\", \"VERSION\") as stage"; public static String dataErrorCheckSqlWithBizDateAsVersionUpperCase = "SELECT MAX(\"LEGEND_PERSISTENCE_DISTINCT_ROWS\") as \"MAX_DATA_ERRORS\" " + "FROM (SELECT COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_DISTINCT_ROWS\" FROM " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\") as stage"; + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\") as stage"; + + public static String dataErrorsSqlWithBizDateVersion = "SELECT \"id\",\"name\",\"biz_date\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT \"ID\",\"NAME\",\"BIZ_DATE\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + + "as stage GROUP BY \"ID\", \"NAME\", \"BIZ_DATE\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 20"; + + public static String dataErrorsSqlUpperCase = "SELECT \"ID\",\"NAME\",\"VERSION\",COUNT(DISTINCT(\"DIGEST\")) as \"LEGEND_PERSISTENCE_ERROR_COUNT\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + + "as stage GROUP BY \"ID\", \"NAME\", \"VERSION\" HAVING \"LEGEND_PERSISTENCE_ERROR_COUNT\" > 1 LIMIT 10"; + + public static String dataErrorsSql = "SELECT \"id\",\"name\",\"version\",COUNT(DISTINCT(\"digest\")) as \"legend_persistence_error_count\" FROM " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"version\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 535aaeb2d4a..f37d536e0eb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -76,7 +76,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -93,7 +93,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -112,7 +112,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx List deduplicationAndVersioningSql = queries.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -146,7 +146,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_number\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -166,7 +166,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithAdditionalMetadata(), operations.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_number\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -185,7 +185,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_UPDATE_TIME\", \"BATCH_ID\") " + - "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; @@ -200,7 +200,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -217,7 +217,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -250,7 +250,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index 66ca6b07e82..ef0ddbd563d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; @@ -29,18 +29,17 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; public class NontemporalDeltaTest extends NontemporalDeltaTestCases { protected String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsTempStagingTable = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + protected String incomingRecordCountWithSplitsTempStagingTable = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String incomingRecordCountWithSplitsWithDuplicates = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; @@ -56,7 +55,7 @@ public void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + @@ -104,23 +103,23 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List milestoningSqlList = operations.ingestSql(); List metadataIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000'," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; @@ -144,19 +143,19 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE " + "(((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) " + "AND ((stage.\"data_split\" >= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -170,6 +169,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000'," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000'," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -250,7 +250,11 @@ public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + - "sink.\"version\" = (SELECT stage.\"version\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"version\" = (SELECT stage.\"version\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + "sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))"; + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') FROM " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" " + @@ -475,6 +479,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972"; // Stats @@ -560,18 +565,18 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE \"MYDB\".\"MAIN\" as sink " + - "SET sink.\"ID\" = (SELECT stage.\"ID\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"NAME\" = (SELECT stage.\"NAME\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"AMOUNT\" = (SELECT stage.\"AMOUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"BIZ_DATE\" = (SELECT stage.\"BIZ_DATE\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"DIGEST\" = (SELECT stage.\"DIGEST\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + - "sink.\"VERSION\" = (SELECT stage.\"VERSION\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "SET sink.\"ID\" = (SELECT stage.\"ID\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"NAME\" = (SELECT stage.\"NAME\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"AMOUNT\" = (SELECT stage.\"AMOUNT\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"BIZ_DATE\" = (SELECT stage.\"BIZ_DATE\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"DIGEST\" = (SELECT stage.\"DIGEST\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"VERSION\" = (SELECT stage.\"VERSION\" FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + "sink.\"BATCH_ID\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') " + - "WHERE EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))"; + "WHERE EXISTS (SELECT * FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))"; String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\", \"BATCH_ID\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + - "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN') FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\"))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase, preActionsSqlList.get(0)); @@ -581,7 +586,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithUpperCase(), operations.metadataIngestSql().get(0)); - String insertTempStagingTable = "INSERT INTO \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + String insertTempStagingTable = "INSERT INTO \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + @@ -592,6 +597,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat Assertions.assertEquals(insertTempStagingTable, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index d3d3ef3e843..10c44d84bc7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -29,6 +29,8 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { @@ -48,7 +50,7 @@ public void verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(GeneratorResu List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map andVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map andVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + @@ -79,13 +81,13 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato List milestoningSqlList = operations.ingestSql(); List metadataIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); @@ -108,18 +110,21 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe List milestoningSqlList = operations.ingestSql(); List metadataIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\", \"batch_id\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage)"; String maxDataErrorCheckSql = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + - "(SELECT COUNT(DISTINCT(\"amount\")) as \"legend_persistence_distinct_rows\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(SELECT COUNT(DISTINCT(\"amount\")) as \"legend_persistence_distinct_rows\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; + String dataErrorsSql = "SELECT \"id\",\"name\",\"biz_date\",COUNT(DISTINCT(\"amount\")) as \"legend_persistence_error_count\" FROM " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage GROUP BY \"id\", \"name\", \"biz_date\" HAVING \"legend_persistence_error_count\" > 1 LIMIT 20"; + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSqlList.get(1)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(2)); @@ -129,8 +134,10 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(dataErrorsSql, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); // Stats verifyStats(operations, "staging"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java index ad272ce5473..efd8c2e04e2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; @@ -27,7 +27,7 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @@ -128,13 +128,13 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE " + "(sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -142,7 +142,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "999999999 FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))) AND " + @@ -154,7 +154,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); - String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"delete_indicator\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"delete_indicator\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + @@ -179,7 +179,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -187,7 +187,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + @@ -205,7 +205,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsInserted\""; @@ -337,14 +338,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "as stage WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -352,7 +353,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + @@ -367,6 +368,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -383,14 +385,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "as stage WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -398,7 +400,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + @@ -413,6 +415,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -466,7 +469,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(stage.\"version\" > sink.\"version\")))"; @@ -475,7 +478,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + "999999999 " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -484,7 +487,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage GROUP BY stage.\"id\", " + @@ -492,7 +495,8 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithFilterDuplicates, operations.deduplicationAndVersioningSql().get(1)); - Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(dupRowsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -508,14 +512,14 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink " + "SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1 WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE " + + "(EXISTS (SELECT * FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage WHERE " + "((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")))"; String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999 FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999 FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + "(stage.\"VERSION\" < sink.\"VERSION\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; @@ -523,7 +527,7 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithVersionUpperCase, preActionsSql.get(2)); - String expectedInsertIntoTempStagingMaxVersion = "INSERT INTO \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + String expectedInsertIntoTempStagingMaxVersion = "INSERT INTO \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" " + "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + @@ -533,6 +537,7 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta Assertions.assertEquals(expectedTempStagingCleanupQueryInUpperCase, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoTempStagingMaxVersion, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(dataErrorCheckSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 55890efccd2..68fc66e0996 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -26,8 +26,8 @@ import java.util.List; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; public class UnitemporalDeltaBatchIdDateTimeBasedTest extends UnitmemporalDeltaBatchIdDateTimeBasedTestCases { @@ -79,7 +79,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" <> stage.\"digest\")))"; @@ -88,7 +88,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + @@ -106,7 +106,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(1).metadataIngestSql().get(0)); Assertions.assertEquals(2, operations.size()); - String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"data_split\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"data_split\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + @@ -116,7 +116,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithFilterDuplicates, operations.get(0).deduplicationAndVersioningSql().get(1)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) as \"rowsInserted\""; @@ -210,7 +210,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -218,7 +218,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(sink.\"digest\" = stage.\"digest\") AND ((sink.\"id\" = stage.\"id\") AND " + @@ -227,7 +227,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" <> stage.\"digest\")))"; @@ -86,7 +86,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + @@ -104,7 +104,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(1).metadataIngestSql().get(0)); Assertions.assertEquals(2, operations.size()); - String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"data_split\", \"legend_persistence_count\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"data_split\"," + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + @@ -113,9 +113,10 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.get(0).deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithFilterDuplicates, operations.get(0).deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(maxDupsErrorCheckSql, operations.get(0).deduplicationAndVersioningErrorChecksSql().get(MAX_DUPLICATES)); + Assertions.assertEquals(dupRowsSql, operations.get(0).deduplicationAndVersioningErrorChecksSql().get(DUPLICATE_ROWS)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') as \"rowsInserted\""; @@ -171,7 +172,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -179,7 +180,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND (sink.\"digest\" = stage.\"digest\") " + @@ -189,7 +190,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsInserted\""; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java index b79aeb6c903..9ce111bf20f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -27,6 +27,8 @@ import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.dupRowsSql; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBatchIdBasedTestCases { @@ -45,7 +47,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorR List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -86,19 +88,19 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); @@ -109,7 +111,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index e4b03057f4c..b345447dea8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -25,6 +25,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; + public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSnapshotBatchIdDateTimeBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; @@ -70,19 +73,19 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -94,7 +97,9 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -120,18 +125,18 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = " + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + "UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1,sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00.000000' " + - "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\"," + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "WHERE NOT (stage.\"DIGEST\" IN (SELECT sink.\"DIGEST\" FROM \"MYDB\".\"MAIN\" as sink WHERE sink.\"BATCH_ID_OUT\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); @@ -139,7 +144,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateAsVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateAsVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java index 4db9bb8ae4d..258bee99dfa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -25,6 +25,11 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.dataErrorsSqlWithBizDateVersion; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.dupRowsSql; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { @@ -41,7 +46,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorR List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + @@ -75,20 +80,20 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "(SELECT * FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '9999-12-31 23:59:59')))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -98,8 +103,10 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java new file mode 100644 index 00000000000..c8efa1ff460 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DataErrorFairDistributionTest.java @@ -0,0 +1,122 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.stream.Collectors; + +public class DataErrorFairDistributionTest +{ + @Test + public void testTotalErrorsSmallerThanSampleRowCount() + { + AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); + + Map> dataErrorsByCategory = new HashMap<>(); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUE, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.TYPE_CONVERSION, new LinkedList<>()); + List expectedNullValuesErrors = new ArrayList<>(); + List expectedDatatypeErrors = new ArrayList<>(); + + populateDataErrors(ValidationCategory.NULL_VALUE, ErrorCategory.CHECK_NULL_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.TYPE_CONVERSION, ErrorCategory.TYPE_CONVERSION, 5, 5, dataErrorsByCategory, expectedDatatypeErrors); + + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 10, dataErrorsByCategory); + Assertions.assertEquals(10, results.size()); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT)).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION)).collect(Collectors.toList())); + } + + @Test + public void testExhaustingOneCategory() + { + AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); + + Map> dataErrorsByCategory = new HashMap<>(); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUE, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.TYPE_CONVERSION, new LinkedList<>()); + List expectedNullValuesErrors = new ArrayList<>(); + List expectedDatatypeErrors = new ArrayList<>(); + + populateDataErrors(ValidationCategory.NULL_VALUE, ErrorCategory.CHECK_NULL_CONSTRAINT, 5, 5, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.TYPE_CONVERSION, ErrorCategory.TYPE_CONVERSION, 50, 15, dataErrorsByCategory, expectedDatatypeErrors); + + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(20, 55, dataErrorsByCategory); + Assertions.assertEquals(20, results.size()); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT)).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION)).collect(Collectors.toList())); + } + + @Test + public void testExhaustingBothCategories() + { + AnsiSqlSink sink = (AnsiSqlSink) AnsiSqlSink.get(); + + Map> dataErrorsByCategory = new HashMap<>(); + dataErrorsByCategory.put(ValidationCategory.NULL_VALUE, new LinkedList<>()); + dataErrorsByCategory.put(ValidationCategory.TYPE_CONVERSION, new LinkedList<>()); + List expectedNullValuesErrors = new ArrayList<>(); + List expectedDatatypeErrors = new ArrayList<>(); + + populateDataErrors(ValidationCategory.NULL_VALUE, ErrorCategory.CHECK_NULL_CONSTRAINT, 15, 10, dataErrorsByCategory, expectedNullValuesErrors); + populateDataErrors(ValidationCategory.TYPE_CONVERSION, ErrorCategory.TYPE_CONVERSION, 20, 9, dataErrorsByCategory, expectedDatatypeErrors); + + List results = sink.getDataErrorsWithFairDistributionAcrossCategories(19, 35, dataErrorsByCategory); + Assertions.assertEquals(19, results.size()); + Assertions.assertEquals(expectedNullValuesErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.CHECK_NULL_CONSTRAINT)).collect(Collectors.toList())); + Assertions.assertEquals(expectedDatatypeErrors, results.stream().filter(error -> error.errorCategory().equals(ErrorCategory.TYPE_CONVERSION)).collect(Collectors.toList())); + } + + private void populateDataErrors(ValidationCategory validationCategory, ErrorCategory errorCategory, int totalCount, int expectedCount, Map> dataErrorsByCategory, List expectedList) + { + int count = 1; + while (count <= totalCount) + { + DataError dataError = getDummyDataError(errorCategory, count); + dataErrorsByCategory.get(validationCategory).add(dataError); + if (count <= expectedCount) + { + expectedList.add(dataError); + } + count++; + } + } + + private DataError getDummyDataError(ErrorCategory category, long rowNumber) + { + Map errorDetails = new HashMap<>(); + errorDetails.put(DataError.FILE_NAME, "some_file_name"); + errorDetails.put(DataError.RECORD_NUMBER, rowNumber); + errorDetails.put(DataError.COLUMN_NAME, "some_column_name"); + + return DataError.builder() + .errorCategory(category) + .putAllErrorDetails(errorDetails) + .errorRecord("some_data") + .errorMessage("some_error_message") + .build(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java new file mode 100644 index 00000000000..3ff7f0ede46 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/TableNameGenUtilsTest.java @@ -0,0 +1,30 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TableNameGenUtilsTest +{ + @Test + public void testTableNameGen() + { + String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; + String expectedTableName = "person_temp_lp_yosulf"; + String tableName = TableNameGenUtils.generateTableName("person", "temp", ingestRunId); + Assertions.assertEquals(expectedTableName, tableName); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java index e6bfa481967..bd89b59e866 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java @@ -93,6 +93,12 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) return executePhysicalPlanAndGetResults(physicalPlan, new HashMap<>()); } + @Override + public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, int rows) + { + throw new RuntimeException("Not implemented for Big Query"); + } + @Override public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 2a2b39d71b7..be90222d9db 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -368,6 +368,12 @@ public void executeStatements(List sqls) } } + @Override + public List> executeQuery(String sql, int rows) + { + throw new RuntimeException("Not implemented for Big Query"); + } + public void executeStatementsInANewTransaction(List sqls) { BigQueryTransactionManager txManager = null; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index 92d9869b210..54136d68f7e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -74,7 +74,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -91,7 +91,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(2, generatorResults.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -112,7 +112,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -147,7 +147,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_number`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -167,7 +167,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_number` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -188,7 +188,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`, `BATCH_ID`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -205,7 +205,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -224,7 +224,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -259,7 +259,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); @@ -327,6 +327,7 @@ public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisting .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); @@ -340,7 +341,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,LAKEHOUSE_MD5(TO_JSON(STRUCT(stage.`name`,stage.`biz_date`))),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -357,7 +358,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableIngestQuery, generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java index 41dad3358bb..27b0bc59f63 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java @@ -56,14 +56,14 @@ public class BigQueryTestArtifacts "`digest` STRING," + "PRIMARY KEY (`id`, `name`) NOT ENFORCED)"; - public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`biz_date` DATE," + "`legend_persistence_count` INT64)"; - public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -72,7 +72,7 @@ public class BigQueryTestArtifacts "`version` INT64," + "`legend_persistence_count` INT64)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -80,7 +80,7 @@ public class BigQueryTestArtifacts "`digest` STRING," + "`legend_persistence_count` INT64)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -89,7 +89,7 @@ public class BigQueryTestArtifacts "`legend_persistence_count` INT64," + "`data_split` INT64 NOT NULL)"; - public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + @@ -190,7 +190,7 @@ public class BigQueryTestArtifacts public static String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage WHERE 1 = 1"; - public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE 1 = 1"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE 1 = 1"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging`"; @@ -489,7 +489,7 @@ public class BigQueryTestArtifacts "`delete_indicator` STRING," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`) NOT ENFORCED)"; - public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`," + @@ -498,13 +498,13 @@ public class BigQueryTestArtifacts "FROM `mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage) " + "as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + @@ -513,31 +513,31 @@ public class BigQueryTestArtifacts "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + "WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; - public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`, `data_split`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage"; public static String dataErrorCheckSqlForBizDateAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; public static String dataErrorCheckSqlForVersionAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version`) as stage"; - public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE 1 = 1"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE 1 = 1"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT` " + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + @@ -547,15 +547,27 @@ public class BigQueryTestArtifacts "FROM `MYDB`.`STAGING` as stage GROUP BY stage.`ID`, stage.`NAME`, stage.`AMOUNT`, stage.`BIZ_DATE`, stage.`DIGEST`) as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_RANK` = 1)"; public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + + "WHERE stage.`legend_persistence_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + + "as stage GROUP BY `ID`, `NAME`, `BIZ_DATE` HAVING `LEGEND_PERSISTENCE_ERROR_COUNT` > 1 LIMIT 20"; + + public static String dataErrorsSql = "SELECT `id`,`name`,`version`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + public static String getDropTempTableQuery(String tableName) { return String.format("DROP TABLE IF EXISTS %s", tableName); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index b2d96140351..82ab77db5c0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -60,6 +60,7 @@ public class BulkLoadTest private static final String COL_DECIMAL = "col_decimal"; private static final String COL_DATETIME = "col_datetime"; private static final String COL_VARIANT = "col_variant"; + private static final String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; private static Field col1 = Field.builder() .name(COL_INT) @@ -117,6 +118,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) .batchIdPattern("{NEXT_BATCH_ID}") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -129,14 +131,14 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64,`append_time` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + "(SELECT 'my_name',{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"xyz123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; @@ -191,6 +193,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoEventId() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .putAllAdditionalMetadata(ADDITIONAL_METADATA) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -203,14 +206,14 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoEventId() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64,`append_time` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], compression='GZIP', encoding='UTF8', field_delimiter=',', format='CSV', max_bad_records=100, null_marker='NULL', quote=''', skip_leading_rows=1)"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`, `additional_metadata`) " + "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + @@ -258,6 +261,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) .putAllAdditionalMetadata(ADDITIONAL_METADATA) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -270,14 +274,14 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`) " + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; String expectedMetaIngestSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`, `additional_metadata`) " + "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME')," + @@ -325,6 +329,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -336,7 +341,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`digest` STRING,`batch_id` INT64,`append_time` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_legend_persistence_temp` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `my_db`.`my_name_temp_lp_yosulf` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC,`col_datetime` DATETIME,`col_variant` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; @@ -345,7 +350,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`," + "LAKEHOUSE_MD5(TO_JSON(STRUCT(legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`)))," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + "FROM `my_db`.`my_name_temp_lp_yosulf` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, preActionsSql.get(2)); @@ -387,6 +392,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue(EVENT_ID) .caseConversion(CaseConversion.TO_UPPER) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -398,14 +404,14 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `MY_DB`.`MY_NAME`" + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON,`DIGEST` STRING,`BATCH_ID` INT64,`APPEND_TIME` DATETIME)"; - String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` " + + String expectedCopySql = "CREATE OR REPLACE EXTERNAL TABLE `MY_DB`.`MY_NAME_TEMP_LP_YOSULF` " + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC,`COL_DATETIME` DATETIME,`COL_VARIANT` JSON) " + "OPTIONS (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `MY_DB`.`MY_NAME` " + "(`COL_INT`, `COL_STRING`, `COL_DECIMAL`, `COL_DATETIME`, `COL_VARIANT`, `DIGEST`, `BATCH_ID`, `APPEND_TIME`) " + "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(STRUCT(legend_persistence_temp.`COL_VARIANT`))),(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "FROM `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` as legend_persistence_temp)"; + "FROM `MY_DB`.`MY_NAME_TEMP_LP_YOSULF` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, preActionsSql.get(2)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index 703326e518a..0c2bd02ec81 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -30,10 +30,10 @@ public class NontemporalDeltaTest extends org.finos.legend.engine.persistence.co protected String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsTempStaginTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + protected String incomingRecordCountWithSplitsTempStaginTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as `rowsTerminated`"; @@ -90,7 +90,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List metaIngestSqlList = operations.metadataIngestSql(); String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "USING `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + "THEN UPDATE SET " + @@ -121,7 +121,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + @@ -177,7 +177,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + @@ -364,7 +364,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String mergeSql = "MERGE INTO `mydb`.`main` as sink " + "USING " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND stage.`version` > sink.`version` " + "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version`," + @@ -450,7 +450,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String mergeSql = "MERGE INTO `MYDB`.`MAIN` as sink " + "USING " + - "`MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "`MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "ON (sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`) " + "WHEN MATCHED AND stage.`VERSION` >= sink.`VERSION` " + "THEN UPDATE SET sink.`ID` = stage.`ID`,sink.`NAME` = stage.`NAME`,sink.`AMOUNT` = stage.`AMOUNT`,sink.`BIZ_DATE` = stage.`BIZ_DATE`,sink.`DIGEST` = stage.`DIGEST`,sink.`VERSION` = stage.`VERSION`," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 7b7297a272f..8bc01638fbe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -62,7 +62,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -85,7 +85,7 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 44a38605f2e..b1fdeb17e82 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -23,7 +24,8 @@ import java.util.List; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.MAX_DATA_ERRORS; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @@ -120,7 +122,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -128,7 +130,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -155,7 +157,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -163,7 +165,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -181,7 +183,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` " + "as stage WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -371,7 +373,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -386,6 +388,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -401,14 +404,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + - "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` " + "as stage WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -416,7 +419,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -431,6 +434,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -483,7 +487,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(stage.`version` > sink.`version`)))"; @@ -492,7 +496,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999 " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -515,14 +519,14 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA " + "as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 " + "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + - "(EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "(EXISTS (SELECT * FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + "(stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 9ae66a9d31d..8f22f55b79a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -74,7 +74,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -83,7 +83,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -101,7 +101,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) as `rowsInserted`"; @@ -195,7 +195,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index 3062b2d59ea..6cf1a5f832e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -71,7 +71,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -79,7 +79,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + @@ -97,7 +97,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; @@ -153,7 +153,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -161,7 +161,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + @@ -180,7 +180,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); @@ -89,7 +91,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(2)); Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index b9741ec5ca4..0da5fc07f00 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -69,19 +69,19 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -89,7 +89,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -120,26 +121,27 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + + "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 " + "FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink WHERE sink.`BATCH_ID_OUT` = 999999999)))"; + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink WHERE sink.`BATCH_ID_OUT` = 999999999)))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSqlWithBizDateVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index e51aa4cdfe3..8bffccc055b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -24,6 +24,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { @@ -70,20 +73,20 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + "AND (NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59'))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -97,8 +100,10 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 723cb7e3a7d..17e1332f023 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; @@ -22,7 +23,9 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.relational.api.DataError; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.executor.RelationalExecutionHelper; @@ -30,13 +33,13 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.sink.Sink; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; -import java.util.Collections; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; +import java.util.stream.Collectors; public abstract class RelationalSink implements Sink { @@ -191,4 +194,6 @@ public interface ConstructDatasetFromDatabase } public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); + + public abstract List performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, org.finos.legend.engine.persistence.components.relational.SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index 9288024b09d..b5955b52ccb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -31,6 +31,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.planner.Planner; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -41,6 +42,9 @@ import org.finos.legend.engine.persistence.components.util.MetadataDataset; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; @@ -231,4 +235,85 @@ public static Optional getFirstColumnValue(Map row) } return object; } + + public static List constructDataQualityErrors(Dataset stagingDataset, List> dataErrors, + ErrorCategory errorCategory, CaseConversion caseConversion, String errorField, String errorDetailsKey) + { + List dataErrorList = new ArrayList<>(); + List allFields = stagingDataset.schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + String caseCorrectedErrorField = convertCase(caseConversion, errorField); + + for (Map dataError: dataErrors) + { + dataErrorList.add(DataError.builder() + .errorMessage(errorCategory.getDefaultErrorMessage()) + .errorCategory(errorCategory) + .errorRecord(buildErrorRecord(allFields, dataError)) + .putAllErrorDetails(buildErrorDetails(dataError, caseCorrectedErrorField, errorDetailsKey)) + .build()); + } + return dataErrorList; + } + + private static Map buildErrorDetails(Map dataError, String errorField, String errorDetailsKey) + { + Map errorDetails = new HashMap<>(); + Object errorDetailsValue = dataError.get(errorField); + errorDetails.put(errorDetailsKey, errorDetailsValue); + return errorDetails; + } + + + public static String convertCase(CaseConversion caseConversion, String value) + { + switch (caseConversion) + { + case TO_UPPER: + return value.toUpperCase(); + case TO_LOWER: + return value.toLowerCase(); + default: + return value; + } + } + + public static String buildErrorRecord(List allColumns, Map row) + { + Map errorRecordMap = new HashMap<>(); + + for (String column : allColumns) + { + if (row.containsKey(column)) + { + errorRecordMap.put(column, row.get(column)); + } + } + + ObjectMapper objectMapper = new ObjectMapper(); + try + { + return objectMapper.writeValueAsString(errorRecordMap); + } + catch (JsonProcessingException e) + { + throw new RuntimeException(e); + } + } + + public static String removeLineBreaks(String str) + { + return str.replaceAll("\n", " ").replaceAll("\r", " "); + } + + public static Optional findToken(String message, String regex, int group) + { + Optional token = Optional.empty(); + Matcher matcher = Pattern.compile(regex).matcher(message); + if (matcher.find()) + { + token = Optional.of(matcher.group(group)); + } + return token; + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java new file mode 100644 index 00000000000..69620c2f6ae --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DataErrorAbstract.java @@ -0,0 +1,48 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.api; + +import org.immutables.value.Value; + +import java.util.Map; +import java.util.Optional; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface DataErrorAbstract +{ + + public static final String FILE_NAME = "file"; + public static final String LINE_NUMBER = "line_number"; + public static final String RECORD_NUMBER = "record_number"; + public static final String COLUMN_NAME = "column_name"; + public static final String CHARACTER_POSITION = "character_position"; + public static final String NUM_DUPLICATES = "num_duplicates"; + public static final String NUM_DATA_VERSION_ERRORS = "num_data_version_errors"; + + String errorMessage(); + + ErrorCategory errorCategory(); + + Optional errorRecord(); + + Map errorDetails(); +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java new file mode 100644 index 00000000000..a234a15e855 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/DryRunResultAbstract.java @@ -0,0 +1,34 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.api; + +import org.immutables.value.Value; + +import java.util.List; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public abstract class DryRunResultAbstract +{ + public abstract IngestStatus status(); + + public abstract List errorRecords(); +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java new file mode 100644 index 00000000000..9c12bf39d9a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ErrorCategory.java @@ -0,0 +1,39 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.api; + +public enum ErrorCategory +{ + TYPE_CONVERSION("Unable to type cast column"), + CHECK_NULL_CONSTRAINT("Null values found in non-nullable column"), + CHECK_OTHER_CONSTRAINT("Table constraints not fulfilled"), + PARSING_ERROR("Unable to parse file"), + FILE_NOT_FOUND("File not found in specified location"), + UNKNOWN("Unknown error"), + DUPLICATES("Duplicate rows found"), + DATA_VERSION_ERROR("Data errors (same PK, same version but different data)"); + + private final String defaultErrorMessage; + + ErrorCategory(String defaultErrorMessage) + { + this.defaultErrorMessage = defaultErrorMessage; + } + + public String getDefaultErrorMessage() + { + return defaultErrorMessage; + } +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 8ecf5cbaa16..30e37aa4c06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -14,12 +14,16 @@ package org.finos.legend.engine.persistence.components.relational.api; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.SqlPlanAbstract; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Style; @@ -27,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; @Immutable @@ -43,6 +48,8 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan preActionsSqlPlan(); + public abstract SqlPlan dryRunPreActionsSqlPlan(); + public abstract Optional initializeLockSqlPlan(); public abstract Optional acquireLockSqlPlan(); @@ -53,6 +60,10 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan ingestSqlPlan(); + public abstract SqlPlan dryRunSqlPlan(); + + public abstract Map, SqlPlan>>> dryRunValidationSqlPlan(); + public abstract Optional ingestDataSplitRange(); public abstract SqlPlan metadataIngestSqlPlan(); @@ -63,7 +74,9 @@ public abstract class GeneratorResultAbstract public abstract Optional postCleanupSqlPlan(); - public abstract Map deduplicationAndVersioningErrorChecksSqlPlan(); + public abstract SqlPlan dryRunPostCleanupSqlPlan(); + + public abstract Map deduplicationAndVersioningErrorChecksSqlPlan(); public abstract Map preIngestStatisticsSqlPlan(); @@ -74,6 +87,11 @@ public List preActionsSql() return preActionsSqlPlan().getSqlList(); } + public List dryRunPreActionsSql() + { + return dryRunPreActionsSqlPlan().getSqlList(); + } + public List initializeLockSql() { return initializeLockSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); @@ -99,6 +117,22 @@ public List ingestSql() .orElseGet(ingestSqlPlan()::getSqlList); } + public List dryRunSql() + { + return dryRunSqlPlan().getSqlList(); + } + + public Map, String>>> dryRunValidationSql() + { + return dryRunValidationSqlPlan().keySet().stream() + .collect(Collectors.toMap( + k -> k, + k -> dryRunValidationSqlPlan().get(k).stream().map( + e -> Tuples.pair(e.getOne(), e.getTwo().getSql()) + ).collect(Collectors.toList()) + )); + } + public List metadataIngestSql() { return metadataIngestSqlPlan().getSqlList(); @@ -119,6 +153,11 @@ public List postCleanupSql() return postCleanupSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); } + public List dryRunPostCleanupSql() + { + return dryRunPostCleanupSqlPlan().getSqlList(); + } + public Map preIngestStatisticsSql() { return preIngestStatisticsSqlPlan().keySet().stream() @@ -127,7 +166,7 @@ public Map preIngestStatisticsSql() k -> preIngestStatisticsSqlPlan().get(k).getSql())); } - public Map deduplicationAndVersioningErrorChecksSql() + public Map deduplicationAndVersioningErrorChecksSql() { return deduplicationAndVersioningErrorChecksSqlPlan().keySet().stream() .collect(Collectors.toMap( diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index ed0523fc0a3..7b04d1c29ec 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -14,13 +14,16 @@ package org.finos.legend.engine.persistence.components.relational.api; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.planner.Planner; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.planner.Planners; @@ -35,17 +38,20 @@ import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.MetadataUtils; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.immutables.value.Value.Default; import org.immutables.value.Value.Derived; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Style; import java.time.Clock; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.UUID; import java.util.stream.Collectors; @Immutable @@ -131,6 +137,18 @@ public String batchSuccessStatusValue() return MetadataUtils.MetaTableStatus.DONE.toString(); } + @Default + public int sampleRowCount() + { + return 20; + } + + @Default + public String ingestRunId() + { + return UUID.randomUUID().toString(); + } + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); @@ -149,6 +167,8 @@ protected PlannerOptions plannerOptions() .putAllAdditionalMetadata(additionalMetadata()) .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) + .sampleRowCount(sampleRowCount()) + .ingestRunId(ingestRunId()) .build(); } @@ -219,6 +239,10 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan preActionsLogicalPlan = planner.buildLogicalPlanForPreActions(resources); SqlPlan preActionsSqlPlan = transformer.generatePhysicalPlan(preActionsLogicalPlan); + // dry-run pre-actions + LogicalPlan dryRunPreActionsLogicalPlan = planner.buildLogicalPlanForDryRunPreActions(resources); + SqlPlan dryRunPreActionsSqlPlan = transformer.generatePhysicalPlan(dryRunPreActionsLogicalPlan); + // initialize-lock LogicalPlan initializeLockLogicalPlan = planner.buildLogicalPlanForInitializeLock(resources); Optional initializeLockSqlPlan = Optional.empty(); @@ -235,7 +259,6 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann acquireLockSqlPlan = Optional.of(transformer.generatePhysicalPlan(acquireLockLogicalPlan)); } - // schema evolution Optional schemaEvolutionSqlPlan = Optional.empty(); Optional schemaEvolutionDataset = Optional.empty(); @@ -261,9 +284,9 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann deduplicationAndVersioningSqlPlan = Optional.of(transformer.generatePhysicalPlan(deduplicationAndVersioningLogicalPlan)); } - Map deduplicationAndVersioningErrorChecksLogicalPlan = planner.buildLogicalPlanForDeduplicationAndVersioningErrorChecks(resources); - Map deduplicationAndVersioningErrorChecksSqlPlan = new HashMap<>(); - for (DedupAndVersionErrorStatistics statistic : deduplicationAndVersioningErrorChecksLogicalPlan.keySet()) + Map deduplicationAndVersioningErrorChecksLogicalPlan = planner.buildLogicalPlanForDeduplicationAndVersioningErrorChecks(resources); + Map deduplicationAndVersioningErrorChecksSqlPlan = new HashMap<>(); + for (DedupAndVersionErrorSqlType statistic : deduplicationAndVersioningErrorChecksLogicalPlan.keySet()) { deduplicationAndVersioningErrorChecksSqlPlan.put(statistic, transformer.generatePhysicalPlan(deduplicationAndVersioningErrorChecksLogicalPlan.get(statistic))); } @@ -272,6 +295,23 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan ingestLogicalPlan = planner.buildLogicalPlanForIngest(resources); SqlPlan ingestSqlPlan = transformer.generatePhysicalPlan(ingestLogicalPlan); + // dry-run + LogicalPlan dryRunLogicalPlan = planner.buildLogicalPlanForDryRun(resources); + SqlPlan dryRunSqlPlan = transformer.generatePhysicalPlan(dryRunLogicalPlan); + + // dry-run validations + Map, LogicalPlan>>> dryRunValidationLogicalPlan = planner.buildLogicalPlanForDryRunValidation(resources); + Map, SqlPlan>>> dryRunValidationSqlPlan = new HashMap<>(); + for (ValidationCategory validationCategory : dryRunValidationLogicalPlan.keySet()) + { + dryRunValidationSqlPlan.put(validationCategory, new ArrayList<>()); + for (Pair, LogicalPlan> pair : dryRunValidationLogicalPlan.get(validationCategory)) + { + SqlPlan sqlplan = transformer.generatePhysicalPlan(pair.getTwo()); + dryRunValidationSqlPlan.get(validationCategory).add(Tuples.pair(pair.getOne(), sqlplan)); + } + } + // metadata ingest LogicalPlan metaDataIngestLogicalPlan = planner.buildLogicalPlanForMetadataIngest(resources); SqlPlan metaDataIngestSqlPlan = transformer.generatePhysicalPlan(metaDataIngestLogicalPlan); @@ -280,6 +320,7 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan postActionsLogicalPlan = planner.buildLogicalPlanForPostActions(resources); SqlPlan postActionsSqlPlan = transformer.generatePhysicalPlan(postActionsLogicalPlan); + // post-cleanup LogicalPlan postCleanupLogicalPlan = planner.buildLogicalPlanForPostCleanup(resources); Optional postCleanupSqlPlan = Optional.empty(); if (postCleanupLogicalPlan != null) @@ -287,6 +328,10 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann postCleanupSqlPlan = Optional.of(transformer.generatePhysicalPlan(postCleanupLogicalPlan)); } + // dry-run post-cleanup + LogicalPlan dryRunPostCleanupLogicalPlan = planner.buildLogicalPlanForDryRunPostCleanup(resources); + SqlPlan dryRunPostCleanupSqlPlan = transformer.generatePhysicalPlan(dryRunPostCleanupLogicalPlan); + // post-run statistics Map postIngestStatisticsLogicalPlan = planner.buildLogicalPlanForPostRunStatistics(resources); Map postIngestStatisticsSqlPlan = new HashMap<>(); @@ -297,13 +342,17 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann return GeneratorResult.builder() .preActionsSqlPlan(preActionsSqlPlan) + .dryRunPreActionsSqlPlan(dryRunPreActionsSqlPlan) .initializeLockSqlPlan(initializeLockSqlPlan) .acquireLockSqlPlan(acquireLockSqlPlan) .schemaEvolutionSqlPlan(schemaEvolutionSqlPlan) .schemaEvolutionDataset(schemaEvolutionDataset) .ingestSqlPlan(ingestSqlPlan) + .dryRunSqlPlan(dryRunSqlPlan) + .putAllDryRunValidationSqlPlan(dryRunValidationSqlPlan) .postActionsSqlPlan(postActionsSqlPlan) .postCleanupSqlPlan(postCleanupSqlPlan) + .dryRunPostCleanupSqlPlan(dryRunPostCleanupSqlPlan) .metadataIngestSqlPlan(metaDataIngestSqlPlan) .deduplicationAndVersioningSqlPlan(deduplicationAndVersioningSqlPlan) .putAllDeduplicationAndVersioningErrorChecksSqlPlan(deduplicationAndVersioningErrorChecksSqlPlan) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 60a5310c55d..5b4a8e678ee 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -23,6 +23,8 @@ import org.finos.legend.engine.persistence.components.importer.Importer; import org.finos.legend.engine.persistence.components.importer.Importers; import org.finos.legend.engine.persistence.components.ingestmode.*; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicationHandler; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DeriveDataErrorRowsLogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; @@ -32,6 +34,7 @@ import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.exception.DataQualityException; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; @@ -42,6 +45,7 @@ import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; import org.finos.legend.engine.persistence.components.util.SqlLogging; import org.immutables.value.Value.Default; @@ -56,8 +60,12 @@ import java.time.LocalDateTime; import java.util.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.*; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.TABLE_IS_NON_EMPTY; import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.*; +import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.retrieveValueAsLong; +import static org.finos.legend.engine.persistence.components.relational.api.DataErrorAbstract.NUM_DATA_VERSION_ERRORS; +import static org.finos.legend.engine.persistence.components.relational.api.DataErrorAbstract.NUM_DUPLICATES; import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.BULK_LOAD_BATCH_STATUS_PATTERN; import static org.finos.legend.engine.persistence.components.transformer.Transformer.TransformOptionsAbstract.DATE_TIME_FORMATTER; @@ -162,6 +170,18 @@ public String batchSuccessStatusValue() return MetadataUtils.MetaTableStatus.DONE.toString(); } + @Default + public int sampleRowCount() + { + return 20; + } + + @Derived + public String getIngestRunId() + { + return UUID.randomUUID().toString(); + } + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); @@ -189,7 +209,6 @@ protected TransformOptions transformOptions() private GeneratorResult generatorResult; boolean mainDatasetExists; private Planner planner; - private boolean datasetsInitialized = false; // ---------- API ---------- @@ -259,6 +278,21 @@ public SchemaEvolutionResult evolve() return schemaEvolveResult; } + /* + - Perform dry run of Ingestion - only supported for Bulk Load + */ + public DryRunResult dryRun() + { + LOGGER.info("Invoked dryRun method, will perform the dryRun"); + validateDatasetsInitialization(); + List dataErrors = performDryRun(); + IngestStatus ingestStatus = dataErrors.isEmpty() ? IngestStatus.SUCCEEDED : IngestStatus.FAILED; + DryRunResult dryRunResult = DryRunResult.builder().status(ingestStatus).addAllErrorRecords(dataErrors).build(); + LOGGER.info("DryRun completed"); + return dryRunResult; + } + + /* - Perform ingestion from staging to main dataset based on the Ingest mode, executes in current transaction */ @@ -415,24 +449,43 @@ public void dedupAndVersion() { LOGGER.info("Executing Deduplication and Versioning"); executor.executePhysicalPlan(generatorResult.deduplicationAndVersioningSqlPlan().get()); - Map errorStatistics = executeDeduplicationAndVersioningErrorChecks(executor, generatorResult.deduplicationAndVersioningErrorChecksSqlPlan()); - /* Error Checks - 1. if Dedup = fail on dups, Fail the job if count > 1 - 2. If versioining = Max Version/ All Versioin, Check for data error - */ - Optional maxDuplicatesValue = retrieveValueAsLong(errorStatistics.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Optional maxDataErrorsValue = retrieveValueAsLong(errorStatistics.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); - if (maxDuplicatesValue.isPresent() && maxDuplicatesValue.get() > 1) + + Map dedupAndVersionErrorSqlTypeSqlPlanMap = generatorResult.deduplicationAndVersioningErrorChecksSqlPlan(); + + // Error Check for Duplicates: if Dedup = fail on dups, Fail the job if count > 1 + if (dedupAndVersionErrorSqlTypeSqlPlanMap.containsKey(MAX_DUPLICATES)) { - String errorMessage = "Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy"; - LOGGER.error(errorMessage); - throw new RuntimeException(errorMessage); + List result = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(MAX_DUPLICATES)); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); + Optional maxDuplicatesValue = retrieveValueAsLong(obj.orElse(null)); + if (maxDuplicatesValue.isPresent() && maxDuplicatesValue.get() > 1) + { + // Find the duplicate rows + TabularData duplicateRows = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DUPLICATE_ROWS)).get(0); + String errorMessage = "Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy"; + LOGGER.error(errorMessage); + List dataErrors = ApiUtils.constructDataQualityErrors(enrichedDatasets.stagingDataset(), duplicateRows.getData(), + ErrorCategory.DUPLICATES, caseConversion(), DatasetDeduplicationHandler.COUNT, NUM_DUPLICATES); + throw new DataQualityException(errorMessage, dataErrors); + } } - if (maxDataErrorsValue.isPresent() && maxDataErrorsValue.get() > 1) + + // Error Check for Data Error: If versioning = Max Version/ All Versioning, Check for data error + if (dedupAndVersionErrorSqlTypeSqlPlanMap.containsKey(MAX_DATA_ERRORS)) { - String errorMessage = "Encountered Data errors (same PK, same version but different data), hence failing the batch"; - LOGGER.error(errorMessage); - throw new RuntimeException(errorMessage); + List result = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(MAX_DATA_ERRORS)); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); + Optional maxDataErrorsValue = retrieveValueAsLong(obj.orElse(null)); + if (maxDataErrorsValue.isPresent() && maxDataErrorsValue.get() > 1) + { + // Find the data errors + TabularData errors = executor.executePhysicalPlanAndGetResults(dedupAndVersionErrorSqlTypeSqlPlanMap.get(DATA_ERROR_ROWS)).get(0); + String errorMessage = "Encountered Data errors (same PK, same version but different data), hence failing the batch"; + LOGGER.error(errorMessage); + List dataErrors = ApiUtils.constructDataQualityErrors(enrichedDatasets.stagingDataset(), errors.getData(), + ErrorCategory.DATA_VERSION_ERROR, caseConversion(), DeriveDataErrorRowsLogicalPlan.DATA_VERSION_ERROR_COUNT, NUM_DATA_VERSION_ERRORS); + throw new DataQualityException(errorMessage, dataErrors); + } } } } @@ -490,6 +543,21 @@ private List ingest(List dataSplitRanges, Schema } } + private List performDryRun() + { + if (enrichedIngestMode instanceof BulkLoad) + { + executor.executePhysicalPlan(generatorResult.dryRunPreActionsSqlPlan()); + List results = relationalSink().performDryRun(enrichedDatasets, transformer, executor, generatorResult.dryRunSqlPlan(), generatorResult.dryRunValidationSqlPlan(), sampleRowCount(), caseConversion()); + executor.executePhysicalPlan(generatorResult.dryRunPostCleanupSqlPlan()); + return results; + } + else + { + throw new RuntimeException("Dry Run not supported for this ingest Mode : " + enrichedIngestMode.getClass().getSimpleName()); + } + } + private List performFullIngestion(RelationalConnection connection, Datasets datasets, List dataSplitRanges) { // 1. init @@ -547,6 +615,7 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) { throw new IllegalStateException("Executor not initialized, call init(Connection) before invoking this method!"); } + // 1. Case handling enrichedIngestMode = ApiUtils.applyCase(ingestMode(), caseConversion()); enrichedDatasets = ApiUtils.enrichAndApplyCase(datasets, caseConversion()); @@ -611,6 +680,8 @@ private Datasets enrichDatasetsAndGenerateOperations(Datasets datasets) .putAllAdditionalMetadata(placeholderAdditionalMetadata) .bulkLoadEventIdValue(bulkLoadEventIdValue()) .batchSuccessStatusValue(batchSuccessStatusValue()) + .sampleRowCount(sampleRowCount()) + .ingestRunId(getIngestRunId()) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, generator.plannerOptions(), relationalSink().capabilities()); @@ -702,7 +773,7 @@ private Datasets importExternalDataset(Datasets datasets) DatasetReference mainDataSetReference = datasets.mainDataset().datasetReference(); externalDatasetReference = externalDatasetReference - .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : LogicalPlanUtils.generateTableNameWithSuffix(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING)) + .withName(externalDatasetReference.name().isPresent() ? externalDatasetReference.name().get() : TableNameGenUtils.generateTableName(mainDataSetReference.name().orElseThrow(IllegalStateException::new), STAGING, getIngestRunId())) .withDatabase(externalDatasetReference.database().isPresent() ? externalDatasetReference.database().get() : mainDataSetReference.database().orElse(null)) .withGroup(externalDatasetReference.group().isPresent() ? externalDatasetReference.group().get() : mainDataSetReference.group().orElse(null)) .withAlias(externalDatasetReference.alias().isPresent() ? externalDatasetReference.alias().get() : mainDataSetReference.alias().orElseThrow(RuntimeException::new) + UNDERSCORE + STAGING); @@ -764,20 +835,6 @@ private Map executeStatisticsPhysicalPlan(Executor executeDeduplicationAndVersioningErrorChecks(Executor executor, - Map errorChecksPlan) - { - Map results = new HashMap<>(); - for (Map.Entry entry: errorChecksPlan.entrySet()) - { - List result = executor.executePhysicalPlanAndGetResults(entry.getValue()); - Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); - Object value = obj.orElse(null); - results.put(entry.getKey(), value); - } - return results; - } - private Map extractPlaceHolderKeyValues(Datasets datasets, Executor executor, Planner planner, Transformer transformer, IngestMode ingestMode, Optional dataSplitRange) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java new file mode 100644 index 00000000000..eb8c44cf446 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/exception/DataQualityException.java @@ -0,0 +1,35 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.exception; + +import org.finos.legend.engine.persistence.components.relational.api.DataError; + +import java.util.List; + +public class DataQualityException extends RuntimeException +{ + private List dataErrors; + + public List getDataErrors() + { + return dataErrors; + } + + public DataQualityException(String message, List dataErrors) + { + super(message); + this.dataErrors = dataErrors; + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java index 8e8e509abf3..76857df5f2f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java @@ -81,6 +81,22 @@ public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) return resultSetList; } + @Override + public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, int rows) + { + List resultSetList = new ArrayList<>(); + for (String sql : physicalPlan.getSqlList()) + { + SqlUtils.logSql(LOGGER, sqlLogging, sql); + List> queryResult = relationalExecutionHelper.executeQuery(sql, rows); + if (!queryResult.isEmpty()) + { + resultSetList.add(new TabularData(queryResult)); + } + } + return resultSetList; + } + @Override public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan, Map placeholderKeyValues) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java index f1136234cbb..723927863ec 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java @@ -470,6 +470,42 @@ public void executeStatements(List sqls) } } + @Override + public List> executeQuery(String sql, int rows) + { + if (this.transactionManager != null) + { + return this.transactionManager.convertResultSetToList(sql, rows); + } + else + { + JdbcTransactionManager txManager = null; + try + { + txManager = new JdbcTransactionManager(connection); + return txManager.convertResultSetToList(sql, rows); + } + catch (Exception e) + { + throw new RuntimeException("Error executing SQL query: " + sql, e); + } + finally + { + if (txManager != null) + { + try + { + txManager.close(); + } + catch (SQLException e) + { + LOGGER.error("Error closing transaction manager.", e); + } + } + } + } + } + @Override public List> executeQuery(String sql) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java index 09b25ea1c2b..2dfa4abb349 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcTransactionManager.java @@ -83,19 +83,7 @@ public List> convertResultSetToList(String sql) { while (resultSet.next()) { - ResultSetMetaData metaData = resultSet.getMetaData(); - int columnCount = resultSet.getMetaData().getColumnCount(); - Map row = new HashMap<>(); - for (int i = 1; i <= columnCount; i++) - { - Object value = resultSet.getObject(i); - if (metaData.getColumnTypeName(i).equalsIgnoreCase("JSON") && value instanceof byte[]) - { - value = new String((byte[]) value, StandardCharsets.UTF_8); - } - row.put(metaData.getColumnName(i), value); - } - resultList.add(row); + extractResults(resultList, resultSet); } } return resultList; @@ -105,4 +93,43 @@ public List> convertResultSetToList(String sql) throw new RuntimeException(e); } } + + public List> convertResultSetToList(String sql, int rows) + { + try + { + List> resultList = new ArrayList<>(); + try (ResultSet resultSet = this.statement.executeQuery(sql)) + { + int iter = 0; + while (resultSet.next() && iter < rows) + { + iter++; + extractResults(resultList, resultSet); + } + } + return resultList; + } + catch (SQLException e) + { + throw new RuntimeException(e); + } + } + + private static void extractResults(List> resultList, ResultSet resultSet) throws SQLException + { + ResultSetMetaData metaData = resultSet.getMetaData(); + int columnCount = resultSet.getMetaData().getColumnCount(); + Map row = new HashMap<>(); + for (int i = 1; i <= columnCount; i++) + { + Object value = resultSet.getObject(i); + if (metaData.getColumnTypeName(i).equalsIgnoreCase("JSON") && value instanceof byte[]) + { + value = new String((byte[]) value, StandardCharsets.UTF_8); + } + row.put(metaData.getColumnName(i), value); + } + resultList.add(row); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index 712a5da2699..9b9216e8b1d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -31,6 +31,7 @@ public enum Clause MERGE_INTO("MERGE INTO"), USING("USING"), WHERE("WHERE"), + HAVING("HAVING"), SET("SET"), EXISTS("EXISTS"), WHEN_MATCHED("WHEN MATCHED"), @@ -60,6 +61,9 @@ public enum Clause NOT_ENFORCED("NOT ENFORCED"), DATA_TYPE("DATA TYPE"), CONVERT("CONVERT"), + CAST("CAST"), + TRY_CAST("TRY_CAST"), + AS("AS"), ARRAY("ARRAY"), LOAD_DATA("LOAD DATA"), OVERWRITE("OVERWRITE"), diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java index 16d11fe9c9c..a3176a9c3e4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/SelectStatement.java @@ -43,24 +43,39 @@ public class SelectStatement extends SelectExpression implements DMLStatement private final List selectItems; private Long selectItemsSize; private final List tables; - private Condition condition; + private Condition whereCondition; private final List groupByFields; + private Condition havingCondition; private Integer limit; + private boolean hasWhereCondition; public SelectStatement() { this.selectItems = new ArrayList<>(); this.tables = new ArrayList<>(); this.groupByFields = new ArrayList<>(); + this.hasWhereCondition = false; } - public SelectStatement(Quantifier quantifier, List selectItems, List tables, Condition condition, List groupByFields) + public SelectStatement(Quantifier quantifier, List selectItems, List tables, Condition whereCondition) { this.quantifier = quantifier; this.selectItems = selectItems; this.tables = tables; - this.condition = condition; + this.whereCondition = whereCondition; + this.hasWhereCondition = whereCondition != null; + this.groupByFields = new ArrayList<>(); + } + + public SelectStatement(Quantifier quantifier, List selectItems, List tables, Condition whereCondition, List groupByFields, Condition havingCondition) + { + this.quantifier = quantifier; + this.selectItems = selectItems; + this.tables = tables; + this.whereCondition = whereCondition; this.groupByFields = groupByFields; + this.hasWhereCondition = whereCondition != null; + this.havingCondition = havingCondition; } /* @@ -107,10 +122,10 @@ public void genSql(StringBuilder builder) throws SqlDomException } // Add where clause - if (condition != null) + if (whereCondition != null) { builder.append(WHITE_SPACE + Clause.WHERE.get() + WHITE_SPACE); - condition.genSql(builder); + whereCondition.genSql(builder); } // Add group by clause @@ -130,6 +145,13 @@ public void genSql(StringBuilder builder) throws SqlDomException } } + // Add having clause + if (havingCondition != null) + { + builder.append(WHITE_SPACE + Clause.HAVING.get() + WHITE_SPACE); + havingCondition.genSql(builder); + } + // Add limit clause if (limit != null) { @@ -156,7 +178,14 @@ public void push(Object node) } else if (node instanceof Condition) { - condition = (Condition) node; + if (whereCondition == null && hasWhereCondition) + { + whereCondition = (Condition) node; + } + else + { + havingCondition = (Condition) node; + } } else if (node instanceof Quantifier) { @@ -216,4 +245,9 @@ public void setLimit(int limit) { this.limit = limit; } + + public void setHasWhereCondition(boolean hasWhereCondition) + { + this.hasWhereCondition = hasWhereCondition; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java index 243caae8648..80208a83db1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/WindowFunction.java @@ -73,7 +73,6 @@ public void genSql(StringBuilder builder) throws SqlDomException @Override public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException { - validate(); function.genSqlWithoutAlias(builder); builder.append(WHITE_SPACE); builder.append(OVER); @@ -81,7 +80,7 @@ public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException builder.append(OPEN_PARENTHESIS); // Add Partition By - if (partitionByFields != null) + if (partitionByFields != null && !partitionByFields.isEmpty()) { builder.append(PARTITION_BY.get() + WHITE_SPACE); for (int ctr = 0; ctr < partitionByFields.size(); ctr++) @@ -122,12 +121,4 @@ else if (node instanceof Field) partitionByFields.add((Field) node); } } - - void validate() throws SqlDomException - { - if ((partitionByFields == null || partitionByFields.isEmpty()) && (orderByFields == null || orderByFields.isEmpty())) - { - throw new SqlDomException("Both partitionByFields and orderByFields are empty"); - } - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java index f785f09d51a..c6164052c06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ConditionTest.java @@ -125,8 +125,7 @@ void testExistsCondition() null, Collections.singletonList(item), Collections.singletonList(table), - condition, - Collections.emptyList()); + condition); Condition existsCondition = new ExistsCondition(selectExpression); String expected = "EXISTS (SELECT \"item1\" FROM \"mydb\".\"mytable\" WHERE \"item1\" = 1)"; @@ -161,8 +160,7 @@ void testInSelectCondition() null, Collections.singletonList(field2), Collections.singletonList(tableB), - null, - Collections.emptyList()); + null); String expected = "sink.\"col1\" IN (SELECT stage.\"col2\" FROM \"mydb\".\"mytable2\" as stage)"; Condition condition = new InCondition(field1, selectExpression); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java index 706cbd9e2f9..228aa82635c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/InsertTest.java @@ -96,8 +96,7 @@ void testInsertWithSelect() null, Arrays.asList(item1, item2, item3, item4, item5, item6, item7, item8, item9, item10, item11, item12), Collections.singletonList(tableToSelect), - null, - Collections.emptyList()); + null); InsertStatement insertStatement = new InsertStatement(tableToInsert, columns, selectExpression); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java index aea7afd123d..36b0dbc8207 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/SelectExpressionTest.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.quantifiers.DistinctQuantifier; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.Condition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.comparison.EqualityCondition; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.comparison.GreaterThanCondition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.comparison.NotEqualCondition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.conditions.logical.AndCondition; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.select.SelectExpression; @@ -55,8 +56,7 @@ void genSqlForSimpleSelect() new DistinctQuantifier(), Arrays.asList(item1, item2), Collections.singletonList(table), - null, - Collections.emptyList()); + null); String sql1 = BaseTest.genSqlIgnoringErrors(selectExpression); String expected = "SELECT DISTINCT \"item1\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\""; @@ -73,8 +73,7 @@ void genSqlForSimpleSelectStarWithLimit() null, Collections.singletonList(new All(BaseTest.QUOTE_IDENTIFIER)), Collections.singletonList(table), - null, - Collections.emptyList()); + null); selectStatement.setLimit(10); String sql1 = BaseTest.genSqlIgnoringErrors(selectStatement); @@ -99,8 +98,7 @@ void genSqlForCondtionalSelect() null, Arrays.asList(item1, item2), Collections.singletonList(table), - condition, - Collections.emptyList()); + condition); String sql1 = BaseTest.genSqlIgnoringErrors(selectExpression); String expected = "SELECT \"item1\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" WHERE (\"item1\" = 100) AND (\"item2\" <> 50)"; @@ -127,8 +125,7 @@ void genSqlForInnerJoin() new DistinctQuantifier(), Arrays.asList(item1, item2, item3), Collections.singletonList(table), - null, - Collections.emptyList()); + null); String sql1 = BaseTest.genSqlIgnoringErrors(selectExpression); String expected = "SELECT DISTINCT A.\"id\",A.\"item2\",B.\"item3\" as \"my_item\" FROM \"mydb\".\"left\" as A INNER JOIN \"mydb\".\"right\" as B ON A.\"id\" = B.\"id\""; @@ -144,8 +141,7 @@ void genSqlForSelectSelectItemsMissing() new DistinctQuantifier(), Collections.emptyList(), Collections.singletonList(table), - null, - Collections.emptyList()); + null); try { BaseTest.genSql(selectExpression); @@ -164,19 +160,47 @@ void genSqlForSelectWithConditionAndGroupBy() Field item1 = new Field(null, "item1", BaseTest.QUOTE_IDENTIFIER, null); Field item2 = new Field(null, "item2", BaseTest.QUOTE_IDENTIFIER, "my_item"); - Condition condition = new NotEqualCondition(item2, new NumericalValue(50L, BaseTest.QUOTE_IDENTIFIER)); + Condition whereCondition = new NotEqualCondition(item2, new NumericalValue(50L, BaseTest.QUOTE_IDENTIFIER)); + Condition havingCondition = new GreaterThanCondition(new Field("count", BaseTest.QUOTE_IDENTIFIER), new NumericalValue(1L, BaseTest.QUOTE_IDENTIFIER)); Function countFunction = new Function(FunctionName.COUNT, Collections.singletonList(item1), BaseTest.QUOTE_IDENTIFIER); + countFunction.setAlias("count"); SelectExpression selectExpression = new SelectStatement( null, Arrays.asList(countFunction, item2), Collections.singletonList(table), - condition, - Collections.singletonList(item2)); + whereCondition, + Collections.singletonList(item2), + havingCondition); + + String sql = BaseTest.genSqlIgnoringErrors(selectExpression); + String expected = "SELECT COUNT(\"item1\") as \"count\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" WHERE \"item2\" <> 50 GROUP BY \"item2\" HAVING \"count\" > 1"; + assertEquals(expected, sql); + } + + @Test + void genSqlForSelectWithoutConditionAndGroupBy() + { + Table table = new Table("mydb", null, "mytable", null, BaseTest.QUOTE_IDENTIFIER); + + Field item1 = new Field(null, "item1", BaseTest.QUOTE_IDENTIFIER, null); + Field item2 = new Field(null, "item2", BaseTest.QUOTE_IDENTIFIER, "my_item"); + Condition havingCondition = new GreaterThanCondition(new Field("count", BaseTest.QUOTE_IDENTIFIER), new NumericalValue(1L, BaseTest.QUOTE_IDENTIFIER)); + Function countFunction = new Function(FunctionName.COUNT, Collections.singletonList(item1), BaseTest.QUOTE_IDENTIFIER); + countFunction.setAlias("count"); + + SelectExpression selectExpression = + new SelectStatement( + null, + Arrays.asList(countFunction, item2), + Collections.singletonList(table), + null, + Collections.singletonList(item2), + havingCondition); String sql = BaseTest.genSqlIgnoringErrors(selectExpression); - String expected = "SELECT COUNT(\"item1\"),\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" WHERE \"item2\" <> 50 GROUP BY \"item2\""; + String expected = "SELECT COUNT(\"item1\") as \"count\",\"item2\" as \"my_item\" FROM \"mydb\".\"mytable\" GROUP BY \"item2\" HAVING \"count\" > 1"; assertEquals(expected, sql); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java index 4827ff062ef..8121d0dd091 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/UpdateStatementTest.java @@ -102,16 +102,14 @@ void genSqlForUpdateWithJoin() null, Collections.singletonList(new Field(joinTable.getAlias(), "col1", BaseTest.QUOTE_IDENTIFIER, null)), Collections.singletonList(joinTable), - pkMatchCondition, - Collections.emptyList()), + pkMatchCondition), BaseTest.QUOTE_IDENTIFIER)), new Pair<>(new Field("col2", BaseTest.QUOTE_IDENTIFIER), new SelectValue( new SelectStatement( null, Collections.singletonList(new Field(joinTable.getAlias(), "col2", BaseTest.QUOTE_IDENTIFIER, null)), Collections.singletonList(joinTable), - pkMatchCondition, - Collections.emptyList()), + pkMatchCondition), BaseTest.QUOTE_IDENTIFIER))); Condition whereCondition = new ExistsCondition( @@ -119,8 +117,7 @@ void genSqlForUpdateWithJoin() null, Collections.singletonList(new All(BaseTest.QUOTE_IDENTIFIER)), Collections.singletonList(joinTable), - pkMatchCondition, - Collections.emptyList())); + pkMatchCondition)); UpdateStatement query = new UpdateStatement(table, setPairs, whereCondition); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java index 9835faeaba9..771fbd635e4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/WindowFunctionTest.java @@ -21,7 +21,6 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Function; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.OrderedField; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.WindowFunction; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.Arrays; @@ -32,23 +31,6 @@ public class WindowFunctionTest { - - @Test - void testBothPartitionAndOrderByFieldsMissing() - { - Function rowNumber = new Function(FunctionName.ROW_NUMBER, null, BaseTest.QUOTE_IDENTIFIER); - WindowFunction windowFunction = new WindowFunction(BaseTest.QUOTE_IDENTIFIER, rowNumber, null, null); - try - { - String sql = BaseTest.genSql(windowFunction); - Assertions.fail("Should have thrown Exception"); - } - catch (Exception e) - { - assertEquals("Both partitionByFields and orderByFields are empty", e.getMessage()); - } - } - @Test void testWithPartitionFields() { @@ -99,4 +81,12 @@ void testWithPartitionFieldsAndOrderByFields() assertEquals("ROW_NUMBER() OVER (PARTITION BY stage.\"field1\",stage.\"field2\" ORDER BY stage.\"field1\" ASC,stage.\"field2\")", sql); } + @Test + void testPartitionAndOrderByBothEmpty() + { + Function rowNumber = new Function(FunctionName.ROW_NUMBER, null, BaseTest.QUOTE_IDENTIFIER); + WindowFunction windowFunction = new WindowFunction(BaseTest.QUOTE_IDENTIFIER, rowNumber, null, null); + String sql = BaseTest.genSql(windowFunction); + assertEquals("ROW_NUMBER() OVER ()", sql); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml index f371af294dc..58d4ea210d8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml @@ -50,6 +50,13 @@ value + + + org.eclipse.collections + eclipse-collections-api + + + com.h2database diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index d2a986eaffd..1bffbd37bb8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -14,11 +14,16 @@ package org.finos.legend.engine.persistence.components.relational.h2; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; import java.util.Optional; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; @@ -29,9 +34,13 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.LoadCsv; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.HashFunction; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -39,6 +48,10 @@ import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; +import org.finos.legend.engine.persistence.components.relational.api.ApiUtils; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; @@ -49,6 +62,8 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.DigestUdfVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.HashFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.LoadCsvVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.MetadataFileNameFieldVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.MetadataRowNumberFieldVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.SchemaDefinitionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ParseJsonFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.FieldVisitor; @@ -57,16 +72,18 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesSelectionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ToArrayFunctionVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.TryCastFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; -import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import java.sql.Connection; import java.sql.DriverManager; @@ -76,10 +93,14 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Queue; import java.util.Set; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.TYPE_CONVERSION; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; public class H2Sink extends AnsiSqlSink { @@ -100,6 +121,7 @@ public class H2Sink extends AnsiSqlSink capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.DATA_TYPE_SCALE_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); + capabilities.add(Capability.DRY_RUN); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -116,6 +138,9 @@ public class H2Sink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); logicalPlanVisitorByClass.put(ToArrayFunction.class, new ToArrayFunctionVisitor()); + logicalPlanVisitorByClass.put(TryCastFunction.class, new TryCastFunctionVisitor()); + logicalPlanVisitorByClass.put(MetadataFileNameField.class, new MetadataFileNameFieldVisitor()); + logicalPlanVisitorByClass.put(MetadataRowNumberField.class, new MetadataRowNumberFieldVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); Map> implicitDataTypeMapping = new HashMap<>(); @@ -236,4 +261,104 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor performDryRun(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) + { + try + { + return performDryRunWithValidationQueries(datasets, transformer, executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount, caseConversion); + } + catch (Exception e) + { + return parseH2Exceptions(e); + } + } + + private List parseH2Exceptions(Exception e) + { + String errorMessage = e.getMessage(); + String errorMessageWithoutLineBreak = ApiUtils.removeLineBreaks(errorMessage); + + if (errorMessage.contains("IO Exception")) + { + Optional fileName = extractProblematicValueFromErrorMessage(errorMessage); + Map errorDetails = buildErrorDetails(fileName, Optional.empty(), Optional.empty()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(ErrorCategory.FILE_NOT_FOUND.getDefaultErrorMessage()).putAllErrorDetails(errorDetails).build()); + } + + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessageWithoutLineBreak).build()); + } + + public List performDryRunWithValidationQueries(Datasets datasets, Transformer transformer, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) + { + executor.executePhysicalPlan(dryRunSqlPlan); + + int dataErrorsTotalCount = 0; + Map> dataErrorsByCategory = new HashMap<>(); + for (ValidationCategory validationCategory : ValidationCategory.values()) + { + dataErrorsByCategory.put(validationCategory, new LinkedList<>()); + } + + List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + + List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUE, new ArrayList<>()); + List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(TYPE_CONVERSION, new ArrayList<>()); + + // Execute queries for null values + dataErrorsTotalCount += findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); + + // Execute queries for datatype conversion + for (Pair, SqlPlan> pair : queriesForDatatype) + { + try + { + executor.executePhysicalPlanAndGetResults(pair.getTwo()); + } + catch (RuntimeException e) + { + Optional problematicValue = extractProblematicValueFromErrorMessage(e.getCause().getMessage()); + if (problematicValue.isPresent()) + { + // This loop will only be executed once as there is always only one element in the set + for (FieldValue validatedColumn : pair.getOne()) + { + List results = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(LogicalPlanFactory.getLogicalPlanForSelectAllFieldsWithStringFieldEquals(validatedColumn, problematicValue.get())), sampleRowCount); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + DataError dataError = constructDataError(allFields, row, TYPE_CONVERSION, validatedColumn.fieldName(), caseConversion); + dataErrorsByCategory.get(TYPE_CONVERSION).add(dataError); + dataErrorsTotalCount++; + } + } + } + } + + } + } + + return getDataErrorsWithFairDistributionAcrossCategories(sampleRowCount, dataErrorsTotalCount, dataErrorsByCategory); + } + + private Optional extractProblematicValueFromErrorMessage(String errorMessage) + { + errorMessage = errorMessage.substring(0, errorMessage.indexOf("; SQL statement")); + Optional value = Optional.empty(); + if (errorMessage.contains("Data conversion error")) + { + value = ApiUtils.findToken(errorMessage, "Data conversion error converting \"(.*)\"", 1); + } + else if (errorMessage.contains("Cannot parse")) + { + value = ApiUtils.findToken(errorMessage, "Cannot parse \"(.*)\" constant \"(.*)\"", 2); + } + else if (errorMessage.contains("IO Exception")) + { + value = ApiUtils.findToken(errorMessage, "IO Exception: \"IOException reading (.*)\"", 1); + } + return value; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java new file mode 100644 index 00000000000..12dc58dc423 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataFileNameFieldVisitor.java @@ -0,0 +1,39 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.StringValueVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataFileNameFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataFileNameField current, VisitorContext context) + { + if (!(current.stagedFilesDatasetProperties() instanceof H2StagedFilesDatasetProperties)) + { + throw new IllegalStateException("Only H2StagedFilesDatasetProperties are supported for H2 Sink"); + } + H2StagedFilesDatasetProperties datasetProperties = (H2StagedFilesDatasetProperties) current.stagedFilesDatasetProperties(); + + StringValue stringValue = StringValue.of(datasetProperties.filePaths().get(0)); + return new StringValueVisitor().visit(prev, stringValue, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java new file mode 100644 index 00000000000..173f509dbe7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/MetadataRowNumberFieldVisitor.java @@ -0,0 +1,36 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; +import org.finos.legend.engine.persistence.components.logicalplan.values.WindowFunction; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.WindowFunctionVisitor; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataRowNumberFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataRowNumberField current, VisitorContext context) + { + WindowFunction windowFunction = WindowFunction.builder() + .windowFunction(FunctionImpl.builder().functionName(FunctionName.ROW_NUMBER).build()) + .build(); + return new WindowFunctionVisitor().visit(prev, windowFunction, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java new file mode 100644 index 00000000000..46538b40475 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/TryCastFunctionVisitor.java @@ -0,0 +1,49 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; +import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.CastFunction; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class TryCastFunctionVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, TryCastFunction current, VisitorContext context) + { + DataType dataType = new H2DataTypeMapping().getDataType(current.type()); + + CastFunction castFunction = new CastFunction(dataType, context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) + { + castFunction = (CastFunction) optimizer.optimize(castFunction); + } + prev.push(castFunction); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.field()); + + return new VisitorResult(castFunction, logicalPlanNodeList); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java new file mode 100644 index 00000000000..b636e658ca4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/CastFunction.java @@ -0,0 +1,69 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class CastFunction extends Value +{ + private Value column; + private DataType dataType; + + public CastFunction(DataType dataType, String quoteIdentifier) + { + super(quoteIdentifier); + this.dataType = dataType; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append(Clause.CAST); + builder.append(OPEN_PARENTHESIS); + column.genSqlWithoutAlias(builder); + builder.append(WHITE_SPACE); + builder.append(Clause.AS); + builder.append(WHITE_SPACE); + dataType.genSql(builder); + builder.append(CLOSING_PARENTHESIS); + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + column = (Value) node; + } + else if (node instanceof DataType) + { + dataType = (DataType) node; + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java index bd1b5a5fcea..869c8d3fb06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java @@ -1497,6 +1497,34 @@ public static Dataset createDatasetWithUpdatedField(Dataset dataset, Field field return dataset.withSchema(dataset.schema().withFields(newFields)); } + public static void assertEquals(List> expectedList, List> actualList) + { + if (expectedList.size() != actualList.size()) + { + Assertions.fail("Size of expected List does not match actual List"); + } + + for (int i = 0; i < actualList.size(); i++) + { + Map expected = expectedList.get(i); + Map actual = actualList.get(i); + for (Map.Entry entry : expected.entrySet()) + { + Object actualObj = actual.get(entry.getKey()); + Object expectedObj = entry.getValue(); + if (expectedObj == null && actualObj != null) + { + Assertions.fail(String.format("Values mismatch. key: %s, actual value: %s, expected value: %s", entry.getKey(), actualObj, expectedObj)); + } + if (expectedObj != null && !expectedObj.toString().equals(actualObj.toString())) + { + Assertions.fail(String.format("Values mismatch. key: %s, actual value: %s, expected value: %s", entry.getKey(), actualObj, expectedObj)); + } + + } + } + } + private static List readCsvData(String path) throws IOException { List lines = new ArrayList<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index d2db5bcf01e..cf42fe2b8cb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -35,12 +35,18 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.CaseConversion; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.DryRunResult; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; import org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -50,6 +56,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -72,6 +79,7 @@ public class BulkLoadTest extends BaseTest private static final String COL_STRING = "col_string"; private static final String COL_DECIMAL = "col_decimal"; private static final String COL_DATETIME = "col_datetime"; + private static final String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; private static Field col1 = Field.builder() .name(COL_INT) @@ -89,6 +97,16 @@ public class BulkLoadTest extends BaseTest .name(COL_DATETIME) .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) .build(); + private static Field col2NonNullable = Field.builder() + .name(COL_STRING) + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .nullable(false) + .build(); + private static Field col3NonNullable = Field.builder() + .name(COL_DECIMAL) + .type(FieldType.of(DataType.DECIMAL, 5, 2)) + .nullable(false) + .build(); protected final ZonedDateTime fixedZonedDateTime_2000_01_01 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); protected final Clock fixedClock_2000_01_01 = Clock.fixed(fixedZonedDateTime_2000_01_01.toInstant(), ZoneOffset.UTC); @@ -737,6 +755,502 @@ public void testBulkLoadNotCsvFile() } } + @Test + public void testBulkLoadDryRunSuccess() + { + String filePath = "src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\"" + + "(\"col_int\" VARCHAR,\"col_string\" VARCHAR,\"col_decimal\" VARCHAR,\"col_datetime\" VARCHAR,\"legend_persistence_file\" VARCHAR,\"legend_persistence_row_number\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"legend_persistence_file\", \"legend_persistence_row_number\") " + + "SELECT CONVERT(\"col_int\",VARCHAR),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",VARCHAR),CONVERT(\"col_datetime\",VARCHAR)," + + "'src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_decimal\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_decimal\" AS DECIMAL(5,2)) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertNull(operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE)); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + Assertions.assertEquals(dryRunResult.status(), IngestStatus.SUCCEEDED); + Assertions.assertTrue(dryRunResult.errorRecords().isEmpty()); + } + + @Test + public void testBulkLoadDryRunFailure() + { + String filePath = "src/test/resources/data/bulk-load/input/bad_file.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2NonNullable, col3NonNullable, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER,\"col_string\" VARCHAR NOT NULL,\"col_decimal\" DECIMAL(5,2) NOT NULL,\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\"" + + "(\"col_int\" VARCHAR,\"col_string\" VARCHAR,\"col_decimal\" VARCHAR,\"col_datetime\" VARCHAR,\"legend_persistence_file\" VARCHAR,\"legend_persistence_row_number\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"legend_persistence_file\", \"legend_persistence_row_number\") " + + "SELECT CONVERT(\"col_int\",VARCHAR),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",VARCHAR),CONVERT(\"col_datetime\",VARCHAR)," + + "'src/test/resources/data/bulk-load/input/bad_file.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + String expectedDryRunNullValidationSql = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (main_validation_lp_yosulf.\"col_string\" IS NULL) OR (main_validation_lp_yosulf.\"col_decimal\" IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_decimal\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_decimal\" AS DECIMAL(5,2)) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + List expectedErrorRecords = Arrays.asList(DataError.builder() + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) + .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") + .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name(), 1L)) + .build(), DataError.builder() + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) + .errorRecord("{\"col_int\":\"2\",\"col_decimal\":\"NaN\",\"col_string\":null,\"col_datetime\":\"2022-01-12 00:00:00.0\"}") + .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name(), 2L)) + .build(), DataError.builder() + .errorCategory(ErrorCategory.TYPE_CONVERSION) + .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") + .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col1.name(), 1L)) + .build(), DataError.builder() + .errorCategory(ErrorCategory.TYPE_CONVERSION) + .errorRecord("{\"col_int\":\"??\",\"col_decimal\":null,\"col_string\":\"Andy\",\"col_datetime\":\"2022-01-99 00:00:00.0\"}") + .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col4.name(), 1L)) + .build(), DataError.builder() + .errorCategory(ErrorCategory.TYPE_CONVERSION) + .errorRecord("{\"col_int\":\"2\",\"col_decimal\":\"NaN\",\"col_string\":null,\"col_datetime\":\"2022-01-12 00:00:00.0\"}") + .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col3.name(), 2L)) + .build()); + + Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); + Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); + } + + @Test + public void testBulkLoadDryRunFailureWithSampleRowCountWithUpperCase() + { + String filePath = "src/test/resources/data/bulk-load/input/bad_file.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2NonNullable, col3NonNullable, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) + .sampleRowCount(3) + .caseConversion(CaseConversion.TO_UPPER) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN\"" + + "(\"COL_INT\" INTEGER,\"COL_STRING\" VARCHAR NOT NULL,\"COL_DECIMAL\" DECIMAL(5,2) NOT NULL,\"COL_DATETIME\" TIMESTAMP,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN\" " + + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"BATCH_ID\", \"APPEND_TIME\") " + + "SELECT CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"BATCH_ID\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\"" + + "(\"COL_INT\" VARCHAR,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" VARCHAR,\"COL_DATETIME\" VARCHAR,\"LEGEND_PERSISTENCE_FILE\" VARCHAR,\"LEGEND_PERSISTENCE_ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" " + + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"LEGEND_PERSISTENCE_FILE\", \"LEGEND_PERSISTENCE_ROW_NUMBER\") " + + "SELECT CONVERT(\"COL_INT\",VARCHAR),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",VARCHAR),CONVERT(\"COL_DATETIME\",VARCHAR),'src/test/resources/data/bulk-load/input/bad_file.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/bad_file.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; + + String expectedDryRunNullValidationSql = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (MAIN_validation_lp_yosulf.\"COL_STRING\" IS NULL) OR (MAIN_validation_lp_yosulf.\"COL_DECIMAL\" IS NULL) LIMIT 3"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_INT\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_INT\" AS INTEGER) IS NULL) LIMIT 3"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_DECIMAL\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_DECIMAL\" AS DECIMAL(5,2)) IS NULL) LIMIT 3"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT MAIN_validation_lp_yosulf.\"COL_INT\",MAIN_validation_lp_yosulf.\"COL_STRING\",MAIN_validation_lp_yosulf.\"COL_DECIMAL\",MAIN_validation_lp_yosulf.\"COL_DATETIME\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MAIN_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\" as MAIN_validation_lp_yosulf " + + "WHERE (NOT (MAIN_validation_lp_yosulf.\"COL_DATETIME\" IS NULL)) AND (CAST(MAIN_validation_lp_yosulf.\"COL_DATETIME\" AS TIMESTAMP) IS NULL) LIMIT 3"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"MAIN_VALIDATION_LP_YOSULF\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER, 3); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + List expectedErrorRecords = Arrays.asList(DataError.builder() + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) + .errorRecord("{\"COL_STRING\":\"Andy\",\"COL_DATETIME\":\"2022-01-99 00:00:00.0\",\"COL_INT\":\"??\",\"COL_DECIMAL\":null}") + .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col3NonNullable.name().toUpperCase(), 1L)) + .build(), DataError.builder() + .errorCategory(ErrorCategory.CHECK_NULL_CONSTRAINT) + .errorRecord("{\"COL_STRING\":null,\"COL_DATETIME\":\"2022-01-12 00:00:00.0\",\"COL_INT\":\"2\",\"COL_DECIMAL\":\"NaN\"}") + .errorMessage("Null values found in non-nullable column") + .putAllErrorDetails(buildErrorDetails(filePath, col2NonNullable.name().toUpperCase(), 2L)) + .build(), DataError.builder() + .errorCategory(ErrorCategory.TYPE_CONVERSION) + .errorRecord("{\"COL_STRING\":\"Andy\",\"COL_DATETIME\":\"2022-01-99 00:00:00.0\",\"COL_INT\":\"??\",\"COL_DECIMAL\":null}") + .errorMessage("Unable to type cast column") + .putAllErrorDetails(buildErrorDetails(filePath, col1.name().toUpperCase(), 1L)) + .build()); + + Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); + Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); + } + + @Test + public void testBulkLoadDryRunFailureWithFileNotFound() + { + String filePath = "src/test/resources/data/bulk-load/input/non_existent_file.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2NonNullable, col3NonNullable, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .batchIdPattern("{NEXT_BATCH_ID_PATTERN}") + .ingestRunId(ingestRunId) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER,\"col_string\" VARCHAR NOT NULL,\"col_decimal\" DECIMAL(5,2) NOT NULL,\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/non_existent_file.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"batch_id\" = {NEXT_BATCH_ID_PATTERN}", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\"" + + "(\"col_int\" VARCHAR,\"col_string\" VARCHAR,\"col_decimal\" VARCHAR,\"col_datetime\" VARCHAR,\"legend_persistence_file\" VARCHAR,\"legend_persistence_row_number\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "INSERT INTO \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"legend_persistence_file\", \"legend_persistence_row_number\") " + + "SELECT CONVERT(\"col_int\",VARCHAR),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",VARCHAR),CONVERT(\"col_datetime\",VARCHAR)," + + "'src/test/resources/data/bulk-load/input/non_existent_file.csv',ROW_NUMBER() OVER () " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/non_existent_file.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + String expectedDryRunNullValidationSql = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (main_validation_lp_yosulf.\"col_string\" IS NULL) OR (main_validation_lp_yosulf.\"col_decimal\" IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_int\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_int\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_decimal\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_decimal\" AS DECIMAL(5,2)) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql3 = "SELECT main_validation_lp_yosulf.\"col_int\",main_validation_lp_yosulf.\"col_string\",main_validation_lp_yosulf.\"col_decimal\",main_validation_lp_yosulf.\"col_datetime\",main_validation_lp_yosulf.\"legend_persistence_file\",main_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\" as main_validation_lp_yosulf " + + "WHERE (NOT (main_validation_lp_yosulf.\"col_datetime\" IS NULL)) AND (CAST(main_validation_lp_yosulf.\"col_datetime\" AS TIMESTAMP) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"TEST_DB\".\"TEST\".\"main_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(2).getTwo()); + Assertions.assertEquals(3, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); + ingestor.initExecutor(JdbcConnection.of(h2Sink.connection())); + ingestor.initDatasets(datasets); + DryRunResult dryRunResult = ingestor.dryRun(); + + List expectedErrorRecords = Arrays.asList(DataError.builder() + .errorCategory(ErrorCategory.FILE_NOT_FOUND) + .errorMessage("File not found in specified location") + .putAllErrorDetails(buildErrorDetails(filePath)) + .build()); + + Assertions.assertEquals(IngestStatus.FAILED, dryRunResult.status()); + Assertions.assertEquals(new HashSet<>(expectedErrorRecords), new HashSet<>(dryRunResult.errorRecords())); + } + + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, int sampleRowCount) + { + return RelationalIngestor.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .executionTimestampClock(executionTimestampClock) + .cleanupStagingData(options.cleanupStagingData()) + .collectStatistics(options.collectStatistics()) + .enableConcurrentSafety(true) + .caseConversion(caseConversion) + .sampleRowCount(sampleRowCount) + .build(); + } + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, Optional eventId) { return RelationalIngestor.builder() @@ -827,4 +1341,19 @@ private void verifyBulkLoadMetadataForUpperCase(Map appendMetada } } + private Map buildErrorDetails(String fileName, String columnName, Long recordNumber) + { + Map errorDetails = new HashMap<>(); + errorDetails.put(DataError.FILE_NAME, fileName); + errorDetails.put(DataError.COLUMN_NAME, columnName); + errorDetails.put(DataError.RECORD_NUMBER, recordNumber); + return errorDetails; + } + + private Map buildErrorDetails(String fileName) + { + Map errorDetails = new HashMap<>(); + errorDetails.put(DataError.FILE_NAME, fileName); + return errorDetails; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index d60a6fa94bc..558c51a70f5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -552,6 +552,7 @@ void testAppendOnlyWithUDFDigestGenerationWithFieldsToExclude() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId("075605e3-bada-47d7-9ae9-7138f392fe22") .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -568,7 +569,7 @@ void testAppendOnlyWithUDFDigestGenerationWithFieldsToExclude() throws Exception "LAKEHOUSE_MD5(ARRAY['id','name','income','start_time','expiry_date'],ARRAY[CONVERT(staging.\"id\",VARCHAR),CONVERT(staging.\"name\",VARCHAR),CONVERT(staging.\"income\",VARCHAR),CONVERT(staging.\"start_time\",VARCHAR),CONVERT(staging.\"expiry_date\",VARCHAR)])," + "'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"TEST\".\"staging_legend_persistence_temp_staging\" as staging WHERE (staging.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (staging.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + "FROM \"TEST\".\"staging_temp_staging_lp_yosulf\" as staging WHERE (staging.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (staging.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java index 8cda4be62fe..26859a93fbb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.versioning; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; @@ -32,19 +34,22 @@ import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.exception.DataQualityException; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.finos.legend.engine.persistence.components.util.TableNameGenUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.io.IOException; -import java.util.List; -import java.util.Map; +import java.util.*; import static org.finos.legend.engine.persistence.components.TestUtils.*; import static org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract.DATA_SPLIT; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_STAGING_DATASET_BASE_NAME; +import static org.finos.legend.engine.persistence.components.util.TableNameGenUtils.TEMP_STAGING_DATASET_QUALIFIER; public class TestDedupAndVersioning extends BaseTest { @@ -102,8 +107,6 @@ public class TestDedupAndVersioning extends BaseTest .addFields(batch) .build(); - private static final String tempStagingTableName = stagingTableName + "_" + TEMP_STAGING_DATASET_BASE_NAME; - String[] schemaWithCount = new String[]{idName, nameName, incomeName, expiryDateName, digestName, "legend_persistence_count"}; String[] schemaWithVersion = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; String[] schemaWithVersionAndCount = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName, "legend_persistence_count"}; @@ -123,9 +126,9 @@ void testNoDedupNoVersioning() .auditing(NoAuditing.builder().build()) .build(); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); } // Scenario 2 @@ -140,9 +143,9 @@ void testNoDedupMaxVersioningDoNotPerform() .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) .build(); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); } // Scenario 3 @@ -163,20 +166,27 @@ void testNoDedupMaxVersioning() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_max_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersion); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersion, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -194,9 +204,9 @@ void testNoDedupAllVersioningDoNotPerform() .versioningStrategy(AllVersionsStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) .build(); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); } // Scenario 5 @@ -219,20 +229,27 @@ void testNoDedupAllVersion() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_all_version.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersionAndDataSplit); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndDataSplit, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -252,9 +269,9 @@ void testFilterDupsNoVersioning() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data1_filter_dups_no_versioning.csv"; loadDataIntoStagingTableWithoutVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithCount); + verifyResults(expectedDataPath, schemaWithCount, ingestRunId); } // Scenario 7 @@ -275,8 +292,8 @@ void testFilterDupsMaxVersionDoNotPerform() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersionAndCount); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); } @@ -298,20 +315,27 @@ void testFilterDupsMaxVersion() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_max_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); - verifyResults(expectedDataPath, schemaWithVersionAndCount); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -335,9 +359,9 @@ void testFilterDupsAllVersionDoNotPerform() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); } // Scenario 10 @@ -360,21 +384,28 @@ void testFilterDupsAllVersion() throws Exception String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_all_version.csv"; loadDataIntoStagingTableWithVersion(srcDataPath); - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit); + verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit, ingestRunId); // Data error scenario, should throw error String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + Map row1 = new HashMap<>(); + row1.put("name", "Cathy"); + row1.put("id", 3); + row1.put("version", 1); + + DataError dataError = buildDataError(ErrorCategory.DATA_VERSION_ERROR, row1, buildErrorDetailsMap("num_data_version_errors", 2L)); + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -395,12 +426,23 @@ void testFailOnDupsNoVersioning() throws Exception try { - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + Map row1 = new HashMap<>(); + row1.put("name", "Andy"); + row1.put("id", 1); + + Map row2 = new HashMap<>(); + row2.put("name", "Becky"); + row2.put("id", 2); + + DataError dataError1 = buildDataError(ErrorCategory.DUPLICATES, row1, buildErrorDetailsMap("num_duplicates", 3)); + DataError dataError2 = buildDataError(ErrorCategory.DUPLICATES, row2, buildErrorDetailsMap("num_duplicates", 2)); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError1, dataError2), e.getDataErrors()); } } @@ -423,9 +465,9 @@ void testFailOnDupsMaxVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Duplicates scenario, should throw error @@ -433,12 +475,18 @@ void testFailOnDupsMaxVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -461,9 +509,9 @@ void testFailOnDupsMaxVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_max_versioin.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Duplicates scenario, should throw error @@ -471,12 +519,18 @@ void testFailOnDupsMaxVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -502,9 +556,9 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionAndCount); + verifyResults(expectedDataPath, schemaWithVersionAndCount, ingestRunId); // Duplicates scenario, should throw error @@ -512,12 +566,18 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -542,9 +602,9 @@ void testFailOnDupsAllVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath1); String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; - performDedupAndVersioining(datasets, ingestMode); + String ingestRunId = performDedupAndVersioining(datasets, ingestMode); // Validate tempTableExists - verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit); + verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit, ingestRunId); // Duplicates scenario, should throw error @@ -552,12 +612,18 @@ void testFailOnDupsAllVersion() throws Exception loadDataIntoStagingTableWithVersion(srcDataPath2); try { - performDedupAndVersioining(datasets, ingestMode); + ingestRunId = performDedupAndVersioining(datasets, ingestMode); Assertions.fail("Should not succeed"); } - catch (Exception e) + catch (DataQualityException e) { - Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + Map row = new HashMap<>(); + row.put("name", "Becky"); + row.put("id", 2); + + DataError dataError = buildDataError(ErrorCategory.DUPLICATES, row, buildErrorDetailsMap("num_duplicates", 2)); + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + Assertions.assertEquals(Arrays.asList(dataError), e.getDataErrors()); } } @@ -571,11 +637,11 @@ public static DatasetDefinition getStagingTableWithoutVersion() .build(); } - private Dataset getTempStagingDataset() + private Dataset getTempStagingDataset(String ingestRunId) { return DatasetReferenceImpl.builder() .group(testSchemaName) - .name(tempStagingTableName) + .name(getTempStagingTableName(ingestRunId)) .build(); } @@ -612,7 +678,7 @@ public static void createStagingTableWithVersion() h2Sink.executeStatement(createSql); } - private static void performDedupAndVersioining(Datasets datasets, IngestMode ingestMode) + private static String performDedupAndVersioining(Datasets datasets, IngestMode ingestMode) { RelationalIngestor ingestor = RelationalIngestor.builder() .ingestMode(ingestMode) @@ -623,6 +689,7 @@ private static void performDedupAndVersioining(Datasets datasets, IngestMode ing ingestor.initDatasets(datasets); ingestor.create(); ingestor.dedupAndVersion(); + return ingestor.getIngestRunId(); } public static void loadDataIntoStagingTableWithoutVersion(String path) throws Exception @@ -655,10 +722,33 @@ public static void loadDataIntoStagingTableWithVersionAndBatch(String path) thro h2Sink.executeStatement(loadSql); } - private void verifyResults(String expectedDataPath, String [] schema) throws IOException + private void verifyResults(String expectedDataPath, String [] schema, String ingestRunId) throws IOException { - Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset())); - List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", tempStagingTableName)); + Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset(ingestRunId))); + List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", getTempStagingTableName(ingestRunId))); TestUtils.assertFileAndTableDataEquals(schema, expectedDataPath, tableData); } + + private String getTempStagingTableName(String ingestRunId) + { + return TableNameGenUtils.generateTableName(stagingTableName, TEMP_STAGING_DATASET_QUALIFIER, ingestRunId); + } + + private Map buildErrorDetailsMap(String key, Object value) + { + Map errorDetailsMap = new HashMap<>(); + errorDetailsMap.put(key, value); + return errorDetailsMap; + } + + private DataError buildDataError(ErrorCategory errorCategory, Map row, Map errorDetailsMap) throws JsonProcessingException + { + DataError dataError = DataError.builder() + .errorMessage(errorCategory.getDefaultErrorMessage()) + .errorCategory(errorCategory) + .errorRecord(new ObjectMapper().writeValueAsString(row)) + .putAllErrorDetails(errorDetailsMap) + .build(); + return dataError; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv new file mode 100644 index 00000000000..d39a87c98ed --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/bad_file.csv @@ -0,0 +1,3 @@ +??,Andy,,2022-01-99 00:00:00.0 +2,,NaN,2022-01-12 00:00:00.0 +,Success,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv new file mode 100644 index 00000000000..f06cdcc3da8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/good_file_with_edge_case.csv @@ -0,0 +1,3 @@ +1,,5.20,2022-01-11 00:00:00.0 +2,123456789123456789123456789,99.99,2022-01-12 00:00:00.0 +, \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index ab5d15c288d..2585bf392f6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -69,7 +69,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); @@ -86,7 +86,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableIngestQuery, generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -107,7 +107,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -142,7 +142,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_number`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -162,7 +162,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableIngestQueryWithAdditionalMetadata, operations.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_number` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -183,7 +183,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`, `BATCH_ID`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -200,7 +200,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -219,7 +219,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -254,7 +254,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java index 0ac9cceb98b..e8fa3a61038 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java @@ -66,14 +66,14 @@ public class MemsqlTestArtifacts "`batch_id` INTEGER," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`biz_date` DATE," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -82,7 +82,7 @@ public class MemsqlTestArtifacts "`version` INTEGER," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -90,7 +90,7 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_temp_staging_lp_yosulf`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -181,7 +181,7 @@ public class MemsqlTestArtifacts "PRIMARY KEY (`id`, `name`, `batch_update_time`))"; public static String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage"; - public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging` CASCADE"; @@ -467,7 +467,7 @@ public class MemsqlTestArtifacts "`delete_indicator` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; - public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`," + @@ -476,13 +476,13 @@ public class MemsqlTestArtifacts "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage) " + "as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + @@ -491,32 +491,32 @@ public class MemsqlTestArtifacts "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + "WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage"; public static String dataErrorCheckSqlForBizDateAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; public static String dataErrorCheckSqlForVersionAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version`) as stage"; + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version`) as stage"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; - public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage"; - public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT` " + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + @@ -526,5 +526,17 @@ public class MemsqlTestArtifacts "FROM `MYDB`.`STAGING` as stage GROUP BY stage.`ID`, stage.`NAME`, stage.`AMOUNT`, stage.`BIZ_DATE`, stage.`DIGEST`) as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_RANK` = 1)"; public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + + public static String dupRowsSql = "SELECT `id`,`name`,`legend_persistence_count` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + + "WHERE stage.`legend_persistence_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; + + public static String dataErrorsSqlWithBizDateVersionUpperCase = "SELECT `ID`,`NAME`,`BIZ_DATE`,COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_ERROR_COUNT` FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` " + + "as stage GROUP BY `ID`, `NAME`, `BIZ_DATE` HAVING `LEGEND_PERSISTENCE_ERROR_COUNT` > 1 LIMIT 20"; + + public static String dataErrorsSql = "SELECT `id`,`name`,`version`,COUNT(DISTINCT(`digest`)) as `legend_persistence_error_count` FROM " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `version` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index 6dc0d9a95f1..abdb2dca087 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -32,10 +32,10 @@ public class NontemporalDeltaTest extends NontemporalDeltaTestCases "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - protected String incomingRecordCountWithSplitsTempTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + protected String incomingRecordCountWithSplitsTempTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as `rowsTerminated`"; protected String rowsDeleted = "SELECT 0 as `rowsDeleted`"; @@ -86,7 +86,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List metaIngestSqlList = operations.metadataIngestSql(); String updateSql = "UPDATE `mydb`.`main` as sink " + - "INNER JOIN `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "INNER JOIN `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + "SET sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + @@ -100,7 +100,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; @@ -121,7 +121,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + "SET sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + @@ -133,7 +133,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -197,7 +197,7 @@ public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) SET " + "sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + @@ -210,7 +210,7 @@ public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -425,7 +425,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String updateSql = "UPDATE `mydb`.`main` as sink " + "INNER JOIN " + - "`mydb`.`staging_legend_persistence_temp_staging` as stage " + + "`mydb`.`staging_temp_staging_lp_yosulf` as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`) " + "SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version`," + "sink.`batch_id` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')"; @@ -433,7 +433,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQuery, preActionsSqlList.get(0)); @@ -524,7 +524,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String updateSql = "UPDATE `MYDB`.`MAIN` as sink " + "INNER JOIN " + - "`MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "`MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "ON ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`) " + "SET sink.`ID` = stage.`ID`,sink.`NAME` = stage.`NAME`,sink.`AMOUNT` = stage.`AMOUNT`,sink.`BIZ_DATE` = stage.`BIZ_DATE`,sink.`DIGEST` = stage.`DIGEST`,sink.`VERSION` = stage.`VERSION`," + "sink.`BATCH_ID` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')"; @@ -532,7 +532,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN') " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase, preActionsSqlList.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 03665971154..4682a936879 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -27,6 +27,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { String rowsDeleted = "SELECT COUNT(*) as `rowsDeleted` FROM `mydb`.`main` as sink"; @@ -64,7 +67,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -81,17 +84,21 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe List milestoningSqlList = operations.ingestSql(); List metaIngestSqlList = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`, `batch_id`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00.000000'," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN') " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage)"; String maxDataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`amount`)) as `legend_persistence_distinct_rows` " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + + String dataErrorsSqlWithBizDateVersion = "SELECT `id`,`name`,`biz_date`,COUNT(DISTINCT(`amount`)) as `legend_persistence_error_count` " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + + "GROUP BY `id`, `name`, `biz_date` HAVING `legend_persistence_error_count` > 1 LIMIT 20"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSqlList.get(1)); @@ -103,8 +110,10 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); // Stats verifyStats(operations, "staging"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 2f8df95c9c9..e01fe7b8838 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -23,7 +23,8 @@ import java.util.List; -import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.MAX_DATA_ERRORS; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @@ -120,7 +121,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -128,7 +129,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResu "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -155,7 +156,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -163,7 +164,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -181,7 +182,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + - "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -371,7 +372,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -386,6 +387,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -401,14 +403,14 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + - "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -416,7 +418,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); - String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_temp_staging_lp_yosulf` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + @@ -431,6 +433,7 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(Ge Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSql, operations.deduplicationAndVersioningErrorChecksSql().get(DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -483,7 +486,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(stage.`version` > sink.`version`)))"; @@ -492,7 +495,7 @@ public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(Gene "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999 " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -514,14 +517,14 @@ public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutSta String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + - "(EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + + "(EXISTS (SELECT * FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + "(stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 3bedbebbb25..726ff04cca4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -74,7 +74,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -83,7 +83,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -101,7 +101,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) as `rowsInserted`"; @@ -195,7 +195,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index ec8d442fc5e..87dbd91b175 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -72,7 +72,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -80,7 +80,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + @@ -98,7 +98,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000') as `rowsInserted`"; @@ -154,7 +154,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND (sink.`digest` = stage.`digest`) " + @@ -181,7 +181,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); @@ -89,7 +91,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(Generat Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(2)); Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 2d66fb08abf..5600d50fe5b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; @@ -69,19 +69,19 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -93,7 +93,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -120,12 +121,12 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = " + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + "UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00.000000' WHERE " + - "(sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE " + + "(sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE " + "((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + @@ -133,7 +134,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink " + + "FROM `MYDB`.`STAGING_TEMP_STAGING_LP_YOSULF` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink " + "WHERE sink.`BATCH_ID_OUT` = 999999999)))"; String expectedMetadataTableIngestQueryWithAdditionalMetadataWithBatchSuccessValueWithUpperCase = "INSERT INTO BATCH_METADATA " + @@ -147,7 +148,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSqlWithBizDateVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.DATA_ERROR_ROWS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index ab72f14d23e..8a3384ab0b8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; @@ -24,6 +24,9 @@ import java.util.List; import java.util.Map; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DATA_ERROR_ROWS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorSqlType.DUPLICATE_ROWS; + public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { @@ -70,20 +73,20 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(SELECT * FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "FROM `mydb`.`staging_temp_staging_lp_yosulf` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '9999-12-31 23:59:59')))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -97,8 +100,10 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorSqlType.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dupRowsSql, deduplicationAndVersioningErrorChecksSql.get(DUPLICATE_ROWS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorsSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DATA_ERROR_ROWS)); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml index e7f2a018026..00e48713580 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/pom.xml @@ -61,6 +61,21 @@ + + + org.eclipse.collections + eclipse-collections-api + + + + + + org.apache.commons + commons-csv + ${commons-csv.version} + + + org.slf4j diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 040d23b084a..565010e49a6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -16,7 +16,11 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; @@ -35,18 +39,27 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Show; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.api.DataError; +import org.finos.legend.engine.persistence.components.relational.api.ErrorCategory; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; +import org.finos.legend.engine.persistence.components.relational.api.ApiUtils; import org.finos.legend.engine.persistence.components.relational.executor.RelationalExecutor; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; import org.finos.legend.engine.persistence.components.relational.snowflake.optmizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.snowflake.optmizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.SnowflakeDataTypeMapping; @@ -54,6 +67,8 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.AlterVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.BatchEndTimestampVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.ClusterKeyVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.MetadataFileNameFieldVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.MetadataRowNumberFieldVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.SQLCreateVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.SchemaDefinitionVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.FieldVisitor; @@ -65,16 +80,21 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.DigestUdfVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesSelectionVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.TryCastFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.PlaceholderValue; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.io.StringReader; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -82,16 +102,20 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Properties; +import java.util.Queue; import java.util.Set; -import java.util.Objects; import java.util.ArrayList; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.NULL_VALUE; +import static org.finos.legend.engine.persistence.components.util.ValidationCategory.TYPE_CONVERSION; public class SnowflakeSink extends AnsiSqlSink { @@ -110,6 +134,20 @@ public class SnowflakeSink extends AnsiSqlSink private static final String ERRORS_SEEN = "errors_seen"; private static final String FIRST_ERROR = "first_error"; private static final String FIRST_ERROR_COLUMN_NAME = "first_error_column_name"; + private static final String ERROR = "ERROR"; + protected static final String FILE_WITH_ERROR = "FILE"; + protected static final String ROW_NUMBER = "ROW_NUMBER"; + private static final String LINE = "LINE"; + private static final String CHARACTER = "CHARACTER"; + private static final String CATEGORY = "CATEGORY"; + private static final String COLUMN_NAME = "COLUMN_NAME"; + private static final String REJECTED_RECORD = "REJECTED_RECORD"; + private static final String FIELD_DELIMITER = "FIELD_DELIMITER"; + private static final String ESCAPE = "ESCAPE"; + private static final String FIELD_OPTIONALLY_ENCLOSED_BY = "FIELD_OPTIONALLY_ENCLOSED_BY"; + private static final String CATEGORY_CONVERSION = "conversion"; + private static final String CATEGORY_CHECK_CONSTRAINT = "check_constraint"; + private static final String CATEGORY_OTHER = "other"; static { @@ -119,6 +157,7 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.IMPLICIT_DATA_TYPE_CONVERSION); capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.TRANSFORM_WHILE_COPY); + capabilities.add(Capability.DRY_RUN); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); @@ -136,6 +175,9 @@ public class SnowflakeSink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(StagedFilesSelection.class, new StagedFilesSelectionVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); + logicalPlanVisitorByClass.put(TryCastFunction.class, new TryCastFunctionVisitor()); + logicalPlanVisitorByClass.put(MetadataFileNameField.class, new MetadataFileNameFieldVisitor()); + logicalPlanVisitorByClass.put(MetadataRowNumberField.class, new MetadataRowNumberFieldVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); @@ -233,6 +275,218 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } + public List performDryRun(Datasets datasets, Transformer transformer, Executor executor, + SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, + int sampleRowCount, CaseConversion caseConversion) + { + try + { + if (dryRunValidationSqlPlan == null || dryRunValidationSqlPlan.isEmpty()) + { + return performDryRunWithValidationMode(datasets, executor, dryRunSqlPlan, sampleRowCount); + } + else + { + return performDryRunWithValidationQueries(datasets, executor, dryRunSqlPlan, dryRunValidationSqlPlan, sampleRowCount, caseConversion); + } + } + catch (Exception e) + { + return parseSnowflakeExceptions(e); + } + } + + private List parseSnowflakeExceptions(Exception e) + { + String errorMessage = e.getMessage(); + String errorMessageWithoutLineBreaks = ApiUtils.removeLineBreaks(e.getMessage()); + + if (errorMessage.contains("Error parsing")) + { + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.PARSING_ERROR).errorMessage(errorMessageWithoutLineBreaks).build()); + } + + if (errorMessage.contains("file") && errorMessage.contains("was not found")) + { + Optional fileName = ApiUtils.findToken(errorMessage, "file '(.*)' was not found", 1); + Map errorDetails = buildErrorDetails(fileName, Optional.empty(), Optional.empty()); + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.FILE_NOT_FOUND).errorMessage(errorMessageWithoutLineBreaks).putAllErrorDetails(errorDetails).build()); + } + + return Collections.singletonList(DataError.builder().errorCategory(ErrorCategory.UNKNOWN).errorMessage(errorMessageWithoutLineBreaks).build()); + } + + private List performDryRunWithValidationMode(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, int sampleRowCount) + { + List results = executor.executePhysicalPlanAndGetResults(dryRunSqlPlan, sampleRowCount); + List dataErrors = new ArrayList<>(); + + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + Map errorDetails = buildErrorDetails(getString(row, FILE_WITH_ERROR), getString(row, COLUMN_NAME), getLong(row, ROW_NUMBER)); + getLong(row, LINE).ifPresent(line -> errorDetails.put(DataError.LINE_NUMBER, line)); + getLong(row, CHARACTER).ifPresent(characterPos -> errorDetails.put(DataError.CHARACTER_POSITION, characterPos)); + + DataError dataError = DataError.builder() + .errorMessage(getString(row, ERROR).orElseThrow(IllegalStateException::new)) + .errorCategory(parseSnowflakeErrorCategory(row)) + .putAllErrorDetails(errorDetails) + .errorRecord(getString(row, REJECTED_RECORD).map(rejectedRecord -> + { + try + { + return parseSnowflakeRejectedRecord(datasets, rejectedRecord); + } + catch (IOException e) + { + LOGGER.warn("Exception in parsing the record"); + return String.format("{\"%s\" : \"%s\"}", "unparsed_row", rejectedRecord); + } + })) + .build(); + dataErrors.add(dataError); + } + } + return dataErrors; + } + + private ErrorCategory parseSnowflakeErrorCategory(Map row) + { + String snowflakeErrorCategory = getString(row, CATEGORY).orElseThrow(IllegalStateException::new); + String errorMessage = getString(row, ERROR).orElseThrow(IllegalStateException::new); + + if (snowflakeErrorCategory.equals(CATEGORY_CONVERSION)) + { + return ErrorCategory.TYPE_CONVERSION; + } + else if (snowflakeErrorCategory.equals(CATEGORY_CHECK_CONSTRAINT)) + { + if (errorMessage.contains("NULL result in a non-nullable column")) + { + return ErrorCategory.CHECK_NULL_CONSTRAINT; + } + else + { + return ErrorCategory.CHECK_OTHER_CONSTRAINT; + } + } + else if (snowflakeErrorCategory.equals(CATEGORY_OTHER)) + { + if (errorMessage.contains("file") && errorMessage.contains("was not found")) + { + return ErrorCategory.FILE_NOT_FOUND; + } + else + { + return ErrorCategory.UNKNOWN; + } + } + else + { + return ErrorCategory.UNKNOWN; + } + } + + public String parseSnowflakeRejectedRecord(Datasets datasets, String rejectedRecord) throws IOException + { + Map formatOptions = getFormatOptions(datasets); + CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote(null).withEscape(null); + if (formatOptions.containsKey(FIELD_DELIMITER)) + { + csvFormat = csvFormat.withDelimiter(getChar(formatOptions, FIELD_DELIMITER).orElseThrow(IllegalStateException::new)); + } + if (formatOptions.containsKey(ESCAPE)) + { + csvFormat = csvFormat.withEscape(getChar(formatOptions, ESCAPE).orElseThrow(IllegalStateException::new)); + } + if (formatOptions.containsKey(FIELD_OPTIONALLY_ENCLOSED_BY)) + { + csvFormat = csvFormat.withQuote(getChar(formatOptions, FIELD_OPTIONALLY_ENCLOSED_BY).orElseThrow(IllegalStateException::new)); + } + + List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + Map errorRecordMap = new HashMap<>(); + + List records = csvFormat.parse(new StringReader(rejectedRecord)).getRecords(); + for (CSVRecord csvRecord : records) + { + for (int i = 0; i < csvRecord.size(); i++) + { + errorRecordMap.put(allFields.get(i), csvRecord.get(i)); + } + } + + return new ObjectMapper().writeValueAsString(errorRecordMap); + } + + private Map getFormatOptions(Datasets datasets) + { + if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) + { + throw new IllegalStateException("StagedFilesDataset expected"); + } + StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); + if (!(stagedFilesDataset.stagedFilesDatasetProperties() instanceof SnowflakeStagedFilesDatasetProperties)) + { + throw new IllegalStateException("SnowflakeStagedFilesDatasetProperties expected"); + } + SnowflakeStagedFilesDatasetProperties snowflakeStagedFilesDatasetProperties = (SnowflakeStagedFilesDatasetProperties) stagedFilesDataset.stagedFilesDatasetProperties(); + if (!snowflakeStagedFilesDatasetProperties.fileFormat().isPresent() || !(snowflakeStagedFilesDatasetProperties.fileFormat().get() instanceof StandardFileFormat)) + { + throw new IllegalStateException("StandardFileFormat expected"); + } + StandardFileFormat standardFileFormat = (StandardFileFormat) snowflakeStagedFilesDatasetProperties.fileFormat().get(); + if (!standardFileFormat.formatType().equals(FileFormatType.CSV)) + { + throw new IllegalStateException("CSV format expected"); + } + return standardFileFormat.formatOptions(); + } + + private List performDryRunWithValidationQueries(Datasets datasets, Executor executor, SqlPlan dryRunSqlPlan, Map, SqlPlan>>> dryRunValidationSqlPlan, int sampleRowCount, CaseConversion caseConversion) + { + executor.executePhysicalPlan(dryRunSqlPlan); + + int dataErrorsTotalCount = 0; + Map> dataErrorsByCategory = new HashMap<>(); + for (ValidationCategory validationCategory : ValidationCategory.values()) + { + dataErrorsByCategory.put(validationCategory, new LinkedList<>()); + } + + List allFields = datasets.stagingDataset().schemaReference().fieldValues().stream().map(FieldValue::fieldName).collect(Collectors.toList()); + + List, SqlPlan>> queriesForNull = dryRunValidationSqlPlan.getOrDefault(NULL_VALUE, new ArrayList<>()); + List, SqlPlan>> queriesForDatatype = dryRunValidationSqlPlan.getOrDefault(TYPE_CONVERSION, new ArrayList<>()); + + // Execute queries for null values + dataErrorsTotalCount += findNullValuesDataErrors(executor, queriesForNull, dataErrorsByCategory, allFields, caseConversion); + // Execute queries for datatype conversion + for (Pair, SqlPlan> pair : queriesForDatatype) + { + List results = executor.executePhysicalPlanAndGetResults(pair.getTwo()); + if (!results.isEmpty()) + { + List> resultSets = results.get(0).getData(); + for (Map row : resultSets) + { + // This loop will only be executed once as there is always only one element in the set + for (String column : pair.getOne().stream().map(FieldValue::fieldName).collect(Collectors.toSet())) + { + DataError dataError = constructDataError(allFields, row, TYPE_CONVERSION, column, caseConversion); + dataErrorsByCategory.get(TYPE_CONVERSION).add(dataError); + dataErrorsTotalCount++; + } + } + } + } + + return getDataErrorsWithFairDistributionAcrossCategories(sampleRowCount, dataErrorsTotalCount, dataErrorsByCategory); + } + @Override public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { @@ -247,32 +501,32 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor row: resultSets) { - Object bulkLoadStatus = row.get(BULK_LOAD_STATUS); - Object filePath = row.get(FILE); - if (Objects.nonNull(bulkLoadStatus) && Objects.nonNull(filePath)) + Optional bulkLoadStatus = getString(row, BULK_LOAD_STATUS); + Optional filePath = getString(row, FILE); + if (bulkLoadStatus.isPresent() && filePath.isPresent()) { - if (bulkLoadStatus.equals(LOADED)) + if (bulkLoadStatus.get().equals(LOADED)) { totalFilesLoaded++; } else { // if partially loaded or load failed - dataFilePathsWithErrors.add(filePath.toString()); + dataFilePathsWithErrors.add(filePath.get()); errorMessages.add(getErrorMessage(row)); } } - Object rowsWithError = row.get(ERRORS_SEEN); - if (Objects.nonNull(rowsWithError)) + Optional rowsWithError = getLong(row, ERRORS_SEEN); + if (rowsWithError.isPresent()) { - totalRowsWithError += (Long) row.get(ERRORS_SEEN); + totalRowsWithError += rowsWithError.get(); } - Object rowsLoaded = row.get(ROWS_LOADED); - if (Objects.nonNull(rowsLoaded)) + Optional rowsLoaded = getLong(row, ROWS_LOADED); + if (rowsLoaded.isPresent()) { - totalRowsLoaded += (Long) row.get(ROWS_LOADED); + totalRowsLoaded += rowsLoaded.get(); } } @@ -309,16 +563,11 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor row) { Map errorInfoMap = new HashMap<>(); - Object filePath = row.get(FILE); - Object bulkLoadStatus = row.get(BULK_LOAD_STATUS); - Object errorsSeen = row.get(ERRORS_SEEN); - Object firstError = row.get(FIRST_ERROR); - Object firstErrorColumnName = row.get(FIRST_ERROR_COLUMN_NAME); - errorInfoMap.put(FILE, filePath); - errorInfoMap.put(BULK_LOAD_STATUS, bulkLoadStatus); - errorInfoMap.put(ERRORS_SEEN, errorsSeen); - errorInfoMap.put(FIRST_ERROR, firstError); - errorInfoMap.put(FIRST_ERROR_COLUMN_NAME, firstErrorColumnName); + errorInfoMap.put(FILE, row.get(FILE)); + errorInfoMap.put(BULK_LOAD_STATUS, row.get(BULK_LOAD_STATUS)); + errorInfoMap.put(ERRORS_SEEN, row.get(ERRORS_SEEN)); + errorInfoMap.put(FIRST_ERROR, row.get(FIRST_ERROR)); + errorInfoMap.put(FIRST_ERROR_COLUMN_NAME, row.get(FIRST_ERROR_COLUMN_NAME)); ObjectMapper objectMapper = new ObjectMapper(); try diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java index 751698e6ad2..7ff6bda8d92 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; @@ -36,4 +37,18 @@ public interface SnowflakeStagedFilesDatasetPropertiesAbstract extends StagedFil Optional fileFormat(); Map copyOptions(); + + @Value.Derived + default boolean validationModeSupported() + { + // Only supported for CSV + boolean validationModeSuppoerted = false; + if (fileFormat().isPresent() && fileFormat().get() instanceof StandardFileFormat) + { + StandardFileFormat standardFileFormat = (StandardFileFormat) fileFormat().get(); + validationModeSuppoerted = standardFileFormat.formatType().equals(FileFormatType.CSV); + } + + return validationModeSuppoerted; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java index c476b8ec0c7..12f717d070f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; @@ -38,22 +39,29 @@ public VisitorResult visit(PhysicalPlanNode prev, Copy current, VisitorContext c { SnowflakeStagedFilesDatasetProperties properties = (SnowflakeStagedFilesDatasetProperties) current.stagedFilesDatasetProperties(); CopyStatement copyStatement = new CopyStatement(); - setCopyStatementProperties(properties, copyStatement); + setCopyStatementProperties(properties, copyStatement, current); prev.push(copyStatement); List logicalPlanNodes = new ArrayList<>(); logicalPlanNodes.add(current.sourceDataset()); logicalPlanNodes.add(current.targetDataset()); - logicalPlanNodes.addAll(current.fields()); - + if (!current.fields().isEmpty()) + { + logicalPlanNodes.addAll(current.fields()); + } return new VisitorResult(copyStatement, logicalPlanNodes); } - private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProperties properties, CopyStatement copyStatement) + private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProperties properties, CopyStatement copyStatement, Copy current) { copyStatement.setFilePatterns(properties.filePatterns()); copyStatement.setFilePaths(properties.filePaths()); + if (current.validationMode()) + { + copyStatement.setValidationMode("RETURN_ERRORS"); + } + // Add default option into the map Map copyOptions = new HashMap<>(properties.copyOptions()); if (!copyOptions.containsKey("ON_ERROR") && !copyOptions.containsKey("on_error")) @@ -74,8 +82,13 @@ private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProper else if (format instanceof StandardFileFormat) { StandardFileFormat standardFileFormat = (StandardFileFormat) format; + Map formatOptions = new HashMap<>(standardFileFormat.formatOptions()); + if (current.validationMode() && standardFileFormat.formatType().equals(FileFormatType.CSV)) + { + formatOptions.put("ERROR_ON_COLUMN_COUNT_MISMATCH", false); + } copyStatement.setFileFormatType(standardFileFormat.formatType()); - copyStatement.setFileFormatOptions(standardFileFormat.formatOptions()); + copyStatement.setFileFormatOptions(formatOptions); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java new file mode 100644 index 00000000000..d6daac55296 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataFileNameFieldVisitor.java @@ -0,0 +1,32 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataFileNameField; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.MetadataFileNameValue; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataFileNameFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataFileNameField current, VisitorContext context) + { + MetadataFileNameValue fileNameColumn = new MetadataFileNameValue(context.quoteIdentifier()); + prev.push(fileNameColumn); + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java new file mode 100644 index 00000000000..5e64bb0f7f7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/MetadataRowNumberFieldVisitor.java @@ -0,0 +1,47 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; + +import org.finos.legend.engine.persistence.components.common.FileFormatType; +import org.finos.legend.engine.persistence.components.logicalplan.values.MetadataRowNumberField; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.MetadataRowNumberValue; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class MetadataRowNumberFieldVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, MetadataRowNumberField current, VisitorContext context) + { + if (!(current.stagedFilesDatasetProperties() instanceof SnowflakeStagedFilesDatasetProperties)) + { + throw new IllegalStateException("Only SnowflakeStagedFilesDatasetProperties are supported for Snowflake Sink"); + } + SnowflakeStagedFilesDatasetProperties datasetProperties = (SnowflakeStagedFilesDatasetProperties) current.stagedFilesDatasetProperties(); + + int startingRowNumber = 1; + if (datasetProperties.fileFormat().isPresent() && datasetProperties.fileFormat().get() instanceof StandardFileFormat) + { + StandardFileFormat standardFileFormat = (StandardFileFormat) datasetProperties.fileFormat().get(); + startingRowNumber = standardFileFormat.formatType().equals(FileFormatType.AVRO) ? 0 : 1; + } + MetadataRowNumberValue rowNumberColumn = new MetadataRowNumberValue(context.quoteIdentifier(), startingRowNumber); + prev.push(rowNumberColumn); + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java new file mode 100644 index 00000000000..aabbf31ae6c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/TryCastFunctionVisitor.java @@ -0,0 +1,49 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.values.TryCastFunction; +import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.SnowflakeDataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class TryCastFunctionVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, TryCastFunction current, VisitorContext context) + { + DataType dataType = new SnowflakeDataTypeMapping().getDataType(current.type()); + + org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.TryCastFunction tryCastFunction = + new org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.TryCastFunction(dataType, context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) + { + tryCastFunction = (org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.TryCastFunction) optimizer.optimize(tryCastFunction); + } + prev.push(tryCastFunction); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.field()); + + return new VisitorResult(tryCastFunction, logicalPlanNodeList); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java index 6560bbc0edb..acb5f43a8d2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java @@ -15,9 +15,11 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.statements; import org.finos.legend.engine.persistence.components.common.FileFormatType; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.expressions.table.StagedFilesTable; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.TableLike; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Field; @@ -40,7 +42,7 @@ public class CopyStatement implements DMLStatement { private Table table; private final List columns; - private SelectStatement selectStatement; + private TableLike srcTable; private List filePatterns; private List filePaths; private String userDefinedFileFormatName; @@ -48,35 +50,51 @@ public class CopyStatement implements DMLStatement private Map fileFormatOptions; private Map copyOptions; + private String validationMode; + public CopyStatement() { this.columns = new ArrayList<>(); } - public CopyStatement(Table table, List columns, SelectStatement selectStatement) + public CopyStatement(Table table, List columns, TableLike srcTable) { this.table = table; this.columns = columns; - this.selectStatement = selectStatement; + this.srcTable = srcTable; } /* Copy GENERIC PLAN for Snowflake: + + Standard data load + -------------------------------- + COPY INTO [.] + FROM { internalStage | externalStage | externalLocation } + [ FILES = ( '' [ , '' ] [ , ... ] ) ] + [ PATTERN = '' ] + [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | + TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] + [ copyOptions ] + [ VALIDATION_MODE = RETURN__ROWS | RETURN_ERRORS | RETURN_ALL_ERRORS ] + + Data load with transformation -------------------------------- - COPY INTO [.] (COLUMN_LIST) - FROM - ( SELECT [.]$[.] [ , [.]$[.] ... ] - FROM { internalStage | externalStage } ) - [ FILES = ( '' [ , '' ] [ , ... ] ) ] - [ PATTERN = '' ] - [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | - TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] - [ copyOptions ] + COPY INTO [.] [ ( [ , ... ] ) ] + FROM ( SELECT [.]$[.] [ , [.]$[.] ... ] + FROM { internalStage | externalStage } ) + [ FILES = ( '' [ , '' ] [ , ... ] ) ] + [ PATTERN = '' ] + [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | + TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] + [ copyOptions ] + -------------------------------- */ @Override public void genSql(StringBuilder builder) throws SqlDomException { + boolean dataLoadWithTransformation = srcTable instanceof SelectStatement; validate(); builder.append("COPY INTO "); @@ -101,9 +119,15 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(WHITE_SPACE + Clause.FROM.get() + WHITE_SPACE); - builder.append(OPEN_PARENTHESIS); - selectStatement.genSql(builder); - builder.append(CLOSING_PARENTHESIS); + if (dataLoadWithTransformation) + { + builder.append(OPEN_PARENTHESIS); + } + srcTable.genSql(builder); + if (dataLoadWithTransformation) + { + builder.append(CLOSING_PARENTHESIS); + } // File Paths if (filePaths != null && !filePaths.isEmpty()) @@ -139,6 +163,12 @@ else if (fileFormatType != null) builder.append(WHITE_SPACE); addOptions(copyOptions, builder); } + // Add validation mode + if (StringUtils.notEmpty(validationMode)) + { + builder.append(WHITE_SPACE); + builder.append(String.format("VALIDATION_MODE = '%s'", validationMode)); + } } @@ -181,21 +211,30 @@ else if (node instanceof Field) } else if (node instanceof SelectStatement) { - selectStatement = (SelectStatement) node; + srcTable = (SelectStatement) node; + } + else if (node instanceof StagedFilesTable) + { + srcTable = (StagedFilesTable) node; } } void validate() throws SqlDomException { - if (selectStatement == null) + if (srcTable == null) { - throw new SqlDomException("selectStatement is mandatory for Copy Table Command"); + throw new SqlDomException("srcTable is mandatory for Copy Table Command"); } if (table == null) { throw new SqlDomException("table is mandatory for Copy Table Command"); } + + if (StringUtils.notEmpty(validationMode) && srcTable instanceof SelectStatement) + { + throw new SqlDomException("VALIDATION_MODE is not supported for Data load with transformation"); + } } public void setFilePatterns(List filePatterns) @@ -223,8 +262,13 @@ public void setFileFormatOptions(Map fileFormatOptions) this.fileFormatOptions = fileFormatOptions; } + public void setValidationMode(String validationMode) + { + this.validationMode = validationMode; + } + public void setCopyOptions(Map copyOptions) { this.copyOptions = copyOptions; } -} +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameValue.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameValue.java new file mode 100644 index 00000000000..0a4924bef45 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataFileNameValue.java @@ -0,0 +1,40 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +public class MetadataFileNameValue extends Value +{ + + public MetadataFileNameValue(String quoteIdentifier) + { + super(quoteIdentifier); + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append("METADATA$FILENAME"); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberValue.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberValue.java new file mode 100644 index 00000000000..18b736d7952 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/MetadataRowNumberValue.java @@ -0,0 +1,52 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +public class MetadataRowNumberValue extends Value +{ + private int startingRowNumber = 1; + + public MetadataRowNumberValue(String quoteIdentifier) + { + super(quoteIdentifier); + } + + public MetadataRowNumberValue(String quoteIdentifier, int startingRowNumber) + { + super(quoteIdentifier); + this.startingRowNumber = startingRowNumber; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append("METADATA$FILE_ROW_NUMBER"); + if (startingRowNumber != 1) + { + int offset = 1 - startingRowNumber; + builder.append(String.format(" + %d", offset)); // This is to standardize such that row numbers start from 1 + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java new file mode 100644 index 00000000000..b26b223f6ec --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/TryCastFunction.java @@ -0,0 +1,69 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class TryCastFunction extends Value +{ + private Value column; + private DataType dataType; + + public TryCastFunction(DataType dataType, String quoteIdentifier) + { + super(quoteIdentifier); + this.dataType = dataType; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append(Clause.TRY_CAST); + builder.append(OPEN_PARENTHESIS); + column.genSqlWithoutAlias(builder); + builder.append(WHITE_SPACE); + builder.append(Clause.AS); + builder.append(WHITE_SPACE); + dataType.genSql(builder); + builder.append(CLOSING_PARENTHESIS); + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + column = (Value) node; + } + else if (node instanceof DataType) + { + dataType = (DataType) node; + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java index 38bded0371e..03a19a4b8c3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java @@ -52,7 +52,7 @@ public class SnowflakeTestArtifacts "\"BATCH_SOURCE_INFO\" VARIANT," + "\"ADDITIONAL_METADATA\" VARIANT)"; - public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_temp_staging_lp_yosulf\"" + "(\"id\" INTEGER NOT NULL," + "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + @@ -60,7 +60,7 @@ public class SnowflakeTestArtifacts "\"legend_persistence_count\" INTEGER," + "\"data_split\" INTEGER NOT NULL)"; - public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_temp_staging_lp_yosulf\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\", \"data_split\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index b52f4d00b6d..ff771965ad9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -86,6 +86,7 @@ public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisting .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); @@ -100,7 +101,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('name',stage.\"name\",'biz_date',stage.\"biz_date\"))," + "'2000-01-01 00:00:00.000000',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN') " + - "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + "FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), generatorResults.get(0).preActionsSql().get(1)); @@ -116,7 +117,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), generatorResults.get(0).metadataIngestSql().get(0)); // Stats - String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index d27da8f8174..1b0b280e95a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.eclipse.collections.api.tuple.Pair; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; @@ -28,6 +29,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -35,6 +37,7 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.UserDefinedFileFormat; +import org.finos.legend.engine.persistence.components.util.ValidationCategory; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -46,12 +49,15 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import static org.finos.legend.engine.persistence.components.common.StatisticName.*; public class BulkLoadTest { private static final String APPEND_TIME = "append_time"; + private static final String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; + private static Field col1 = Field.builder() .name("col_int") @@ -72,6 +78,26 @@ public class BulkLoadTest .columnNumber(5) .build(); + private static Field col1NonNullable = Field.builder() + .name("col_int") + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .nullable(false) + .build(); + + private static Field col3NonNullable = Field.builder() + .name("col_bigint") + .type(FieldType.of(DataType.BIGINT, Optional.empty(), Optional.empty())) + .columnNumber(4) + .nullable(false) + .build(); + + private static Field col4NonNullable = Field.builder() + .name("col_variant") + .type(FieldType.of(DataType.VARIANT, Optional.empty(), Optional.empty())) + .columnNumber(5) + .nullable(false) + .build(); + private List filesList = Arrays.asList("/path/xyz/file1.csv", "/path/xyz/file2.csv"); protected final ZonedDateTime fixedZonedDateTime_2000_01_01 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); @@ -110,14 +136,19 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") .batchIdPattern("{NEXT_BATCH_ID}") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); + List dryRunPreActionsSql = operations.dryRunPreActionsSql(); List ingestSql = operations.ingestSql(); + List dryRunSql = operations.dryRunSql(); + Map, String>>> dryRunValidationSql = operations.dryRunValidationSql(); List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); + List dryRunPostCleanupSql = operations.dryRunPostCleanupSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER,\"col_integer\" INTEGER,\"batch_id\" INTEGER,\"append_time\" DATETIME)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + @@ -132,9 +163,22 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() String expectedMetadataIngestSql = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + "(SELECT 'my_name',{NEXT_BATCH_ID},'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"task123\",\"file_patterns\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; + String expectedDryRunLoadSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + + "ON_ERROR = 'ABORT_STATEMENT' " + + "VALIDATION_MODE = 'RETURN_ERRORS'"; + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); + Assertions.assertEquals(expectedDryRunPreActionsSql, dryRunPreActionsSql.get(0)); + Assertions.assertEquals(expectedDryRunLoadSql, dryRunSql.get(0)); + Assertions.assertTrue(dryRunValidationSql.isEmpty()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, dryRunPostCleanupSql.get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); @@ -156,9 +200,9 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .stagedFilesDatasetProperties( SnowflakeStagedFilesDatasetProperties.builder() .location("my_location") - .fileFormat(StandardFileFormat.builder().formatType(FileFormatType.CSV).build()) + .fileFormat(StandardFileFormat.builder().formatType(FileFormatType.AVRO).build()) .addAllFilePaths(filesList).build()) - .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col3, col4)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col3NonNullable, col4NonNullable)).build()) .alias("t") .build(); @@ -175,6 +219,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) .batchSuccessStatusValue("SUCCEEDED") .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -184,20 +229,20 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() List metaIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT,\"col_variant\" VARIANT,\"batch_id\" INTEGER)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT NOT NULL,\"col_variant\" VARIANT NOT NULL,\"batch_id\" INTEGER)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_bigint\", \"col_variant\", \"batch_id\") " + "FROM " + "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MY_NAME') " + "FROM my_location as t) " + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + - "FILE_FORMAT = (TYPE = 'CSV') " + + "FILE_FORMAT = (TYPE = 'AVRO') " + "ON_ERROR = 'ABORT_STATEMENT'"; String expectedMetaIngestSql = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\", \"additional_metadata\") " + - "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MY_NAME')," + - "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + - "PARSE_JSON('{\"event_id\":\"task123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}')," + - "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; + "(SELECT 'my_name',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MY_NAME')," + + "'2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + + "PARSE_JSON('{\"event_id\":\"task123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}')," + + "PARSE_JSON('{\"watermark\":\"my_watermark_value\"}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -207,6 +252,40 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() Assertions.assertNull(statsSql.get(ROWS_DELETED)); Assertions.assertNull(statsSql.get(ROWS_TERMINATED)); Assertions.assertNull(statsSql.get(ROWS_UPDATED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + + "(\"col_bigint\" VARCHAR,\"col_variant\" VARCHAR,\"legend_persistence_file\" VARCHAR,\"legend_persistence_row_number\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" (\"col_bigint\", \"col_variant\", \"legend_persistence_file\", \"legend_persistence_row_number\") " + + "FROM (SELECT t.$4 as \"col_bigint\",t.$5 as \"col_variant\",METADATA$FILENAME,METADATA$FILE_ROW_NUMBER + 1 FROM my_location as t) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') FILE_FORMAT = (TYPE = 'AVRO') ON_ERROR = 'ABORT_STATEMENT'"; + + String expectedDryRunNullValidationSql = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"legend_persistence_file\",my_name_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + + "WHERE (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL) OR (my_name_validation_lp_yosulf.\"col_variant\" IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"legend_persistence_file\",my_name_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + + "WHERE (NOT (my_name_validation_lp_yosulf.\"col_bigint\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_bigint\" AS BIGINT) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT my_name_validation_lp_yosulf.\"col_bigint\",my_name_validation_lp_yosulf.\"col_variant\",my_name_validation_lp_yosulf.\"legend_persistence_file\",my_name_validation_lp_yosulf.\"legend_persistence_row_number\" " + + "FROM \"my_db\".\"my_name_validation_lp_yosulf\" as my_name_validation_lp_yosulf " + + "WHERE (NOT (my_name_validation_lp_yosulf.\"col_variant\" IS NULL)) AND (TRY_CAST(my_name_validation_lp_yosulf.\"col_variant\" AS VARIANT) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); } @Test @@ -224,7 +303,7 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() .location("my_location") .fileFormat(UserDefinedFileFormat.of("my_file_format")) .addAllFilePaths(filesList).build()) - .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1NonNullable, col2)).build()) .build(); Dataset mainDataset = DatasetDefinition.builder() @@ -238,6 +317,7 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -247,8 +327,8 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER," + - "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"" + + "(\"COL_INT\" INTEGER NOT NULL,\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" DATETIME)"; String expectedIngestSql = "COPY INTO \"MY_DB\".\"MY_NAME\" " + "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + "FROM " + @@ -273,6 +353,42 @@ public void testBulkLoadWithUpperCaseConversionAndNoEventId() Assertions.assertNull(statsSql.get(ROWS_TERMINATED)); Assertions.assertNull(statsSql.get(ROWS_UPDATED)); Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"MY_DB\".\"MY_NAME\" as my_alias WHERE my_alias.\"BATCH_ID\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')", statsSql.get(ROWS_INSERTED)); + + // Checking dry run + String expectedDryRunPreActionSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\"" + + "(\"COL_INT\" VARCHAR,\"COL_INTEGER\" VARCHAR,\"LEGEND_PERSISTENCE_FILE\" VARCHAR,\"LEGEND_PERSISTENCE_ROW_NUMBER\" BIGINT)"; + + String expectedDryRunDeleteSql = "DELETE FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf"; + + String expectedDryRunLoadSQl = "COPY INTO \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" " + + "(\"COL_INT\", \"COL_INTEGER\", \"LEGEND_PERSISTENCE_FILE\", \"LEGEND_PERSISTENCE_ROW_NUMBER\") FROM " + + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\",METADATA$FILENAME,METADATA$FILE_ROW_NUMBER " + + "FROM my_location as legend_persistence_stage) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') FILE_FORMAT = (FORMAT_NAME = 'my_file_format') ON_ERROR = 'ABORT_STATEMENT'"; + + String expectedDryRunNullValidationSql = "SELECT MY_NAME_validation_lp_yosulf.\"COL_INT\",MY_NAME_validation_lp_yosulf.\"COL_INTEGER\",MY_NAME_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MY_NAME_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf " + + "WHERE MY_NAME_validation_lp_yosulf.\"COL_INT\" IS NULL LIMIT 20"; + + String expectedDryRunDatatypeValidationSql1 = "SELECT MY_NAME_validation_lp_yosulf.\"COL_INT\",MY_NAME_validation_lp_yosulf.\"COL_INTEGER\",MY_NAME_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MY_NAME_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf " + + "WHERE (NOT (MY_NAME_validation_lp_yosulf.\"COL_INT\" IS NULL)) AND (TRY_CAST(MY_NAME_validation_lp_yosulf.\"COL_INT\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunDatatypeValidationSql2 = "SELECT MY_NAME_validation_lp_yosulf.\"COL_INT\",MY_NAME_validation_lp_yosulf.\"COL_INTEGER\",MY_NAME_validation_lp_yosulf.\"LEGEND_PERSISTENCE_FILE\",MY_NAME_validation_lp_yosulf.\"LEGEND_PERSISTENCE_ROW_NUMBER\" " + + "FROM \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\" as MY_NAME_validation_lp_yosulf " + + "WHERE (NOT (MY_NAME_validation_lp_yosulf.\"COL_INTEGER\" IS NULL)) AND (TRY_CAST(MY_NAME_validation_lp_yosulf.\"COL_INTEGER\" AS INTEGER) IS NULL) LIMIT 20"; + + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"MY_DB\".\"MY_NAME_VALIDATION_LP_YOSULF\""; + + Assertions.assertEquals(expectedDryRunPreActionSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunDeleteSql, operations.dryRunSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSQl, operations.dryRunSql().get(1)); + Assertions.assertEquals(expectedDryRunNullValidationSql, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).get(0).getTwo()); + Assertions.assertEquals(1, operations.dryRunValidationSql().get(ValidationCategory.NULL_VALUE).size()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql1, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(0).getTwo()); + Assertions.assertEquals(expectedDryRunDatatypeValidationSql2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).get(1).getTwo()); + Assertions.assertEquals(2, operations.dryRunValidationSql().get(ValidationCategory.TYPE_CONVERSION).size()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); } @Test @@ -338,6 +454,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); @@ -378,6 +495,7 @@ public void testBulkLoadWithDigest() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -439,6 +557,7 @@ public void testBulkLoadWithDigestAndForceOption() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -509,6 +628,7 @@ public void testBulkLoadWithDigestWithFieldsToExcludeAndForceOption() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -573,6 +693,7 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .bulkLoadEventIdValue("task123") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -594,8 +715,21 @@ public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() "FILE_FORMAT = (FIELD_DELIMITER = ',', TYPE = 'CSV') " + "ON_ERROR = 'SKIP_FILE'"; + String expectedDryRunPreActionsSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name_validation_lp_yosulf\"" + + "(\"col_int\" INTEGER,\"col_integer\" INTEGER)"; + String expectedDryRunLoadSql = "COPY INTO \"my_db\".\"my_name_validation_lp_yosulf\" FROM my_location " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (ERROR_ON_COLUMN_COUNT_MISMATCH = false, FIELD_DELIMITER = ',', TYPE = 'CSV') " + + "ON_ERROR = 'SKIP_FILE' " + + "VALIDATION_MODE = 'RETURN_ERRORS'"; + String expectedDryRunPostCleanupSql = "DROP TABLE IF EXISTS \"my_db\".\"my_name_validation_lp_yosulf\""; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals(expectedDryRunPreActionsSql, operations.dryRunPreActionsSql().get(0)); + Assertions.assertEquals(expectedDryRunLoadSql, operations.dryRunSql().get(0)); + Assertions.assertTrue(operations.dryRunValidationSql().isEmpty()); + Assertions.assertEquals(expectedDryRunPostCleanupSql, operations.dryRunPostCleanupSql().get(0)); Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_DELETED)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java index e05ebb63bae..9f4948be9f2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java @@ -77,7 +77,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe List metaIngestSqlList = operations.metadataIngestSql(); String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "USING \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + "THEN UPDATE SET " + @@ -107,7 +107,7 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + @@ -161,7 +161,7 @@ public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + @@ -343,7 +343,7 @@ public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(Gen String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + "USING " + - "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "\"mydb\".\"staging_temp_staging_lp_yosulf\" as stage " + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND stage.\"version\" > sink.\"version\" " + "THEN UPDATE SET sink.\"id\" = stage.\"id\",sink.\"name\" = stage.\"name\",sink.\"amount\" = stage.\"amount\",sink.\"biz_date\" = stage.\"biz_date\",sink.\"digest\" = stage.\"digest\",sink.\"version\" = stage.\"version\"," + @@ -426,7 +426,7 @@ public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(Generat String mergeSql = "MERGE INTO \"MYDB\".\"MAIN\" as sink " + "USING " + - "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "\"MYDB\".\"STAGING_TEMP_STAGING_LP_YOSULF\" as stage " + "ON (sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\") " + "WHEN MATCHED AND stage.\"VERSION\" >= sink.\"VERSION\" " + "THEN UPDATE SET sink.\"ID\" = stage.\"ID\",sink.\"NAME\" = stage.\"NAME\",sink.\"AMOUNT\" = stage.\"AMOUNT\",sink.\"BIZ_DATE\" = stage.\"BIZ_DATE\",sink.\"DIGEST\" = stage.\"DIGEST\",sink.\"VERSION\" = stage.\"VERSION\"," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java index 78877a8d46e..a10b0620bc5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java @@ -47,7 +47,7 @@ void testCopyStatementWithFilesAndStandardFileFormat() throws SqlDomException new StagedFilesField(QUOTE_IDENTIFIER, 3, "t", "field3"), new StagedFilesField(QUOTE_IDENTIFIER, 4, "t", "field4") ); - SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null, null); + SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null); Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); List columns = Arrays.asList( @@ -89,7 +89,7 @@ void testCopyStatementWithPatternAndFileFormatAndForceOption() throws SqlDomExce new StagedFilesField(QUOTE_IDENTIFIER, 1, "t", "field4","field4") ); - SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null, null); + SelectStatement selectStatement = new SelectStatement(null, selectItems, Arrays.asList(stagedFiles), null); Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); List columns = Arrays.asList( @@ -119,6 +119,50 @@ void testCopyStatementWithPatternAndFileFormatAndForceOption() throws SqlDomExce assertEquals(expectedStr, sql1); } + + @Test + void testCopyStatementWithStandardDataLoad() throws SqlDomException + { + Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); + StagedFilesTable stagedFiles = new StagedFilesTable("@my_stage"); + + CopyStatement copyStatement = new CopyStatement(); + copyStatement.push(table); + copyStatement.push(stagedFiles); + copyStatement.setFilePaths(Arrays.asList("path1", "path2")); + Map fileFormatOptions = new HashMap<>(); + fileFormatOptions.put("error_on_column_count_mismatch", false); + copyStatement.setFileFormatType(FileFormatType.CSV); + copyStatement.setFileFormatOptions(fileFormatOptions); + + String sql = genSqlIgnoringErrors(copyStatement); + String expectedSql = "COPY INTO \"mydb\".\"mytable1\" FROM @my_stage FILES = ('path1', 'path2') " + + "FILE_FORMAT = (TYPE = 'CSV', error_on_column_count_mismatch = false)"; + assertEquals(expectedSql, sql); + } + + @Test + void testCopyStatementWithStandardDataLoadAndValidate() throws SqlDomException + { + Table table = new Table("mydb", null, "mytable1", "sink", QUOTE_IDENTIFIER); + StagedFilesTable stagedFiles = new StagedFilesTable("@my_stage"); + + CopyStatement copyStatement = new CopyStatement(); + copyStatement.push(table); + copyStatement.push(stagedFiles); + copyStatement.setFilePaths(Arrays.asList("path1", "path2")); + Map fileFormatOptions = new HashMap<>(); + fileFormatOptions.put("error_on_column_count_mismatch", false); + copyStatement.setFileFormatType(FileFormatType.CSV); + copyStatement.setFileFormatOptions(fileFormatOptions); + copyStatement.setValidationMode("RETURN_ERRORS"); + + String sql = genSqlIgnoringErrors(copyStatement); + String expectedSql = "COPY INTO \"mydb\".\"mytable1\" FROM @my_stage FILES = ('path1', 'path2') " + + "FILE_FORMAT = (TYPE = 'CSV', error_on_column_count_mismatch = false) VALIDATION_MODE = 'RETURN_ERRORS'"; + assertEquals(expectedSql, sql); + } + public static String genSqlIgnoringErrors(SqlGen item) { StringBuilder builder = new StringBuilder(); @@ -132,4 +176,4 @@ public static String genSqlIgnoringErrors(SqlGen item) } return builder.toString(); } -} +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/SnowflakeRejectedRecordParserTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/SnowflakeRejectedRecordParserTest.java new file mode 100644 index 00000000000..343ce4e9f60 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/SnowflakeRejectedRecordParserTest.java @@ -0,0 +1,129 @@ +// Copyright 2024 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormatType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +public class SnowflakeRejectedRecordParserTest +{ + private static Field col1 = Field.builder() + .name("col_int") + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .build(); + private static Field col2 = Field.builder() + .name("col_string") + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .build(); + private static Field col3 = Field.builder() + .name("col_decimal") + .type(FieldType.of(DataType.DECIMAL, Optional.empty(), Optional.empty())) + .columnNumber(4) + .build(); + private static Field col4 = Field.builder() + .name("col_timestamp") + .type(FieldType.of(DataType.TIMESTAMP, Optional.empty(), Optional.empty())) + .columnNumber(5) + .build(); + + private List filesList = Arrays.asList("/path/xyz/file1.csv", "/path/xyz/file2.csv"); + + @Test + public void testSnowflakeRejectedRecordParserDefaultOptions() throws IOException + { + SnowflakeSink sink = (SnowflakeSink) SnowflakeSink.get(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + SnowflakeStagedFilesDatasetProperties.builder() + .location("my_location") + .fileFormat(StandardFileFormat.builder() + .formatType(FileFormatType.CSV) + .build()) + .addAllFilePatterns(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + String commaSeparatedCsvLine = "1,Andy,1.23,2022-01-12 00:00:00.0"; + String expectedJsonString = "{" + + "\"col_timestamp\":\"2022-01-12 00:00:00.0\"," + + "\"col_int\":\"1\"," + + "\"col_decimal\":\"1.23\"," + + "\"col_string\":\"Andy\"}"; + String actualJsonString = sink.parseSnowflakeRejectedRecord(datasets, commaSeparatedCsvLine); + Assertions.assertEquals(expectedJsonString, actualJsonString); + } + + @Test + public void testSnowflakeRejectedRecordParserWithFileFormatOptions() throws IOException + { + SnowflakeSink sink = (SnowflakeSink) SnowflakeSink.get(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + SnowflakeStagedFilesDatasetProperties.builder() + .location("my_location") + .fileFormat(StandardFileFormat.builder() + .formatType(FileFormatType.CSV) + .putFormatOptions("FIELD_DELIMITER", ",") + .putFormatOptions("FIELD_OPTIONALLY_ENCLOSED_BY", '"') + .putFormatOptions("ESCAPE", "\\") + .build()) + .addAllFilePatterns(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + String commaSeparatedCsvLine = "1,\"Andy, Soo\",1.23,\"2022-01-12 00:00:00.0\""; + + String expectedJsonString = "{" + + "\"col_timestamp\":\"2022-01-12 00:00:00.0\"," + + "\"col_int\":\"1\"," + + "\"col_decimal\":\"1.23\"," + + "\"col_string\":\"Andy, Soo\"}"; + String actualJsonString = sink.parseSnowflakeRejectedRecord(datasets, commaSeparatedCsvLine); + Assertions.assertEquals(expectedJsonString, actualJsonString); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index b1aea5f0e4a..fb5e9b2d025 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -64,6 +64,7 @@ public class BaseTest protected String mainDbName = "mydb"; protected String mainTableName = "main"; protected String mainTableAlias = "sink"; + protected String ingestRunId = "075605e3-bada-47d7-9ae9-7138f392fe22"; protected String stagingDbName = "mydb"; protected String stagingTableName = "staging"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java index 2ed71270233..27c2dce46ed 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java @@ -81,6 +81,7 @@ void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(operations, dataSplitRangesOneToTwo); @@ -98,6 +99,7 @@ void testAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRec .cleanupStagingData(true) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult queries = generator.generateOperations(scenario.getDatasets()); @@ -135,6 +137,7 @@ public void testAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExisti .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); @@ -152,6 +155,7 @@ void testAppendOnlyWithUpperCaseOptimizer() .relationalSink(getRelationalSink()) .caseConversion(CaseConversion.TO_UPPER) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -171,6 +175,7 @@ void testAppendOnlyWithLessColumnsInStaging() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -188,6 +193,7 @@ void testAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecor .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -205,6 +211,7 @@ void testAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords() .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -276,6 +283,7 @@ public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisting .relationalSink(getRelationalSink()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java index 8b90ae0de7b..5bb9569ab37 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java @@ -82,6 +82,7 @@ void testNontemporalDeltaWithAuditingFilterDupsNoVersioning() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); @@ -99,6 +100,7 @@ void testNonTemporalDeltaNoAuditingNoDedupAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId("075605e3-bada-47d7-9ae9-7138f392fe22") .build(); List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); @@ -133,6 +135,7 @@ void testNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); @@ -283,6 +286,7 @@ void testNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters() .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(false) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); @@ -336,6 +340,8 @@ void testNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) + .sampleRowCount(10) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java index d88af9bd1b4..e657d5abaa3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java @@ -70,6 +70,7 @@ void testNontemporalSnapshotWithAuditingFilterDupsNoVersioning() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); @@ -87,6 +88,7 @@ void testNontemporalSnapshotWithAuditingFailOnDupMaxVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java index c7a41bb0978..8425f1b8523 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java @@ -82,6 +82,7 @@ void testUnitemporalDeltaWithDeleteIndFilterDupsNoVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(operations); @@ -98,6 +99,7 @@ void testUnitemporalDeltaWithDeleteIndNoDedupAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(operations, dataSplitRangesOneToTwo); @@ -305,6 +307,7 @@ void testUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(false) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(operations); @@ -322,6 +325,7 @@ void testUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(false) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(operations); @@ -358,6 +362,7 @@ void testUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform() .executionTimestampClock(fixedClock_2000_01_01) .cleanupStagingData(true) .batchSuccessStatusValue("SUCCEEDED") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(operations); @@ -377,6 +382,8 @@ void testUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilte .cleanupStagingData(true) .caseConversion(CaseConversion.TO_UPPER) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) + .sampleRowCount(10) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java index 47aa50fbc7b..0cb41f6eb10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java @@ -62,6 +62,7 @@ void testUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); @@ -110,6 +111,7 @@ void testUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(operations, dataSplitRangesOneToTwo); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java index cf3d7e63ca1..7478a2257de 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java @@ -60,6 +60,7 @@ void testUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); @@ -92,6 +93,7 @@ void testUnitemporalDeltaWithDeleteIndFilterDupsAllVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(operations, dataSplitRangesOneToTwo); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java index 29617a200a8..c5b3e4c4551 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java @@ -67,6 +67,7 @@ void testUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion() .collectStatistics(true) .createStagingDataset(true) .enableConcurrentSafety(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java index 1d44bb7b696..2658756a05a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java @@ -63,6 +63,7 @@ void testUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(operations); @@ -98,6 +99,7 @@ void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxV .caseConversion(CaseConversion.TO_UPPER) .putAllAdditionalMetadata(Collections.singletonMap("watermark", "my_watermark_value")) .batchSuccessStatusValue("SUCCEEDED") + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java index 13bc1fca81d..68f94126a17 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java @@ -60,6 +60,7 @@ void testUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion() .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) + .ingestRunId(ingestRunId) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(operations);