diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java new file mode 100644 index 00000000000..60c193d7938 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DedupAndVersionErrorStatistics.java @@ -0,0 +1,21 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +public enum DedupAndVersionErrorStatistics +{ + MAX_DUPLICATES, + MAX_DATA_ERRORS; +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java index 11958ce425e..bfc0302b75e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java @@ -15,15 +15,10 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.ingestmode.audit.Auditing; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; +import org.immutables.value.Value; import java.util.Optional; -import static org.immutables.value.Value.Check; import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; @@ -39,43 +34,12 @@ public interface AppendOnlyAbstract extends IngestMode { Optional digestField(); - Optional dataSplitField(); - Auditing auditing(); - DeduplicationStrategy deduplicationStrategy(); - - @Check - default void validate() + @Value.Default + default boolean filterExistingRecords() { - deduplicationStrategy().accept(new DeduplicationStrategyVisitor() - { - @Override - public Void visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) - { - return null; - } - - @Override - public Void visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) - { - if (!digestField().isPresent()) - { - throw new IllegalStateException("Cannot build AppendOnly, [digestField] must be specified since [deduplicationStrategy] is set to filter duplicates"); - } - return null; - } - - @Override - public Void visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) - { - if (dataSplitField().isPresent()) - { - throw new IllegalStateException("Cannot build AppendOnly, DataSplits not supported for failOnDuplicates mode"); - } - return null; - } - }); + return false; } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaAbstract.java index ac72586ff14..48d793c3792 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaAbstract.java @@ -14,14 +14,19 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicatesAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.NoDeletesMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoning; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidityMilestoning; - -import java.util.Optional; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategyVisitor; +import org.immutables.value.Value; import static org.immutables.value.Value.Default; import static org.immutables.value.Value.Immutable; @@ -37,9 +42,13 @@ ) public interface BitemporalDeltaAbstract extends IngestMode, BitemporalMilestoned { - String digestField(); + @Value.Default + default boolean filterExistingRecords() + { + return false; + } - Optional dataSplitField(); + String digestField(); @Override TransactionMilestoning transactionMilestoning(); @@ -53,15 +62,59 @@ default MergeStrategy mergeStrategy() return NoDeletesMergeStrategy.builder().build(); } - @Default - default DeduplicationStrategy deduplicationStrategy() - { - return AllowDuplicates.builder().build(); - } - @Override default T accept(IngestModeVisitor visitor) { return visitor.visitBitemporalDelta(this); } + + @Value.Check + default void validate() + { + versioningStrategy().accept(new VersioningStrategyVisitor() + { + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + throw new IllegalStateException("Cannot build BitemporalDelta, max version is not supported"); + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (allVersionsStrategyAbstract.performStageVersioning()) + { + throw new IllegalStateException("Cannot build BitemporalDelta, perform versioning not allowed"); + } + return null; + } + }); + + deduplicationStrategy().accept(new DeduplicationStrategyVisitor() + { + @Override + public Void visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + { + return null; + } + + @Override + public Void visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + { + throw new IllegalStateException("Cannot build BitemporalDelta, filter duplicates is not supported"); + } + + @Override + public Void visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + { + throw new IllegalStateException("Cannot build BitemporalDelta, fail on duplicates is not supported"); + } + }); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java index 800ca9c7a6c..5099b17e229 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java @@ -15,7 +15,15 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.ingestmode.audit.Auditing; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicatesAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategyVisitor; import org.immutables.value.Value; @Value.Immutable @@ -39,4 +47,50 @@ default T accept(IngestModeVisitor visitor) { return visitor.visitBulkLoad(this); } + + @Value.Check + default void validate() + { + deduplicationStrategy().accept(new DeduplicationStrategyVisitor() + { + @Override + public Void visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + { + return null; + } + + @Override + public Void visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + { + throw new IllegalStateException("Cannot build BulkLoad, filter duplicates is not supported"); + } + + @Override + public Void visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + { + throw new IllegalStateException("Cannot build BulkLoad, fail on duplicates is not supported"); + } + }); + + versioningStrategy().accept(new VersioningStrategyVisitor() + { + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + throw new IllegalStateException("Cannot build BulkLoad, max version is not supported"); + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + throw new IllegalStateException("Cannot build BulkLoad, all version is not supported"); + } + }); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index b92a06436fc..b6e12b71adf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -17,10 +17,6 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitor; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditingAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditingAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.merge.NoDeletesMergeStrategyAbstract; @@ -72,13 +68,13 @@ public DeriveMainDatasetSchemaFromStaging(Dataset mainDataset, Dataset stagingDa @Override public Dataset visitAppendOnly(AppendOnlyAbstract appendOnly) { + boolean isAuditingFieldPK = doesDatasetContainsAnyPK(mainSchemaFields); + appendOnly.auditing().accept(new EnrichSchemaWithAuditing(mainSchemaFields, isAuditingFieldPK)); if (appendOnly.digestField().isPresent()) { addDigestField(mainSchemaFields, appendOnly.digestField().get()); } removeDataSplitField(appendOnly.dataSplitField()); - boolean isAuditingFieldPK = appendOnly.deduplicationStrategy().accept(new DeriveAuditingFieldPKForAppendOnly(appendOnly.dataSplitField().isPresent())); - appendOnly.auditing().accept(new EnrichSchemaWithAuditing(mainSchemaFields, isAuditingFieldPK)); return mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.addAllFields(mainSchemaFields).build()).build(); } @@ -180,37 +176,9 @@ public static void addDigestField(List schemaFields, String digestFieldNa private boolean doesDatasetContainsAnyPK(List mainSchemaFields) { - return mainSchemaFields.stream().anyMatch(field -> field.primaryKey()); + return mainSchemaFields.stream().anyMatch(Field::primaryKey); } - public static class DeriveAuditingFieldPKForAppendOnly implements DeduplicationStrategyVisitor - { - - private boolean isDataSplitEnabled; - - public DeriveAuditingFieldPKForAppendOnly(boolean isDataSplitEnabled) - { - this.isDataSplitEnabled = isDataSplitEnabled; - } - - @Override - public Boolean visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) - { - return isDataSplitEnabled; - } - - @Override - public Boolean visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) - { - return true; - } - - @Override - public Boolean visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) - { - return false; - } - } public static class EnrichSchemaWithMergeStrategy implements MergeStrategyVisitor { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestMode.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestMode.java index c6e239e3522..78c91752437 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestMode.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestMode.java @@ -14,7 +14,34 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningVisitors; +import org.immutables.value.Value; + +import java.util.Optional; + public interface IngestMode { + @Value.Derived + default Optional dataSplitField() + { + return this.versioningStrategy().accept(VersioningVisitors.EXTRACT_DATA_SPLIT_FIELD); + } + + @Value.Default + default DeduplicationStrategy deduplicationStrategy() + { + return AllowDuplicates.builder().build(); + } + + @Value.Default + default VersioningStrategy versioningStrategy() + { + return NoVersioningStrategy.builder().build(); + } + T accept(IngestModeVisitor visitor); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java index fcdca2edae4..220b55532bb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java @@ -20,11 +20,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditingAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategyVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategyAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; @@ -53,6 +49,8 @@ import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.ValidityDerivation; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.ValidityDerivationVisitor; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTimeAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import java.util.Optional; import java.util.List; @@ -77,10 +75,11 @@ public IngestMode visitAppendOnly(AppendOnlyAbstract appendOnly) { return AppendOnly .builder() - .dataSplitField(applyCase(appendOnly.dataSplitField())) .digestField(applyCase(appendOnly.digestField())) .auditing(appendOnly.auditing().accept(new AuditingCaseConverter())) .deduplicationStrategy(appendOnly.deduplicationStrategy()) + .versioningStrategy(appendOnly.versioningStrategy().accept(new VersionStrategyCaseConverter())) + .filterExistingRecords(appendOnly.filterExistingRecords()) .build(); } @@ -89,8 +88,9 @@ public IngestMode visitNontemporalSnapshot(NontemporalSnapshotAbstract nontempor { return NontemporalSnapshot .builder() - .dataSplitField(applyCase(nontemporalSnapshot.dataSplitField())) .auditing(nontemporalSnapshot.auditing().accept(new AuditingCaseConverter())) + .deduplicationStrategy(nontemporalSnapshot.deduplicationStrategy()) + .versioningStrategy(nontemporalSnapshot.versioningStrategy().accept(new VersionStrategyCaseConverter())) .build(); } @@ -100,9 +100,9 @@ public IngestMode visitNontemporalDelta(NontemporalDeltaAbstract nontemporalDelt return NontemporalDelta .builder() .digestField(applyCase(nontemporalDelta.digestField())) - .dataSplitField(applyCase(nontemporalDelta.dataSplitField())) .mergeStrategy(nontemporalDelta.mergeStrategy().accept(new MergeStrategyCaseConverter())) .auditing(nontemporalDelta.auditing().accept(new AuditingCaseConverter())) + .deduplicationStrategy(nontemporalDelta.deduplicationStrategy()) .versioningStrategy(nontemporalDelta.versioningStrategy().accept(new VersionStrategyCaseConverter())) .build(); } @@ -117,6 +117,8 @@ public IngestMode visitUnitemporalSnapshot(UnitemporalSnapshotAbstract unitempor .addAllPartitionFields(applyCase(unitemporalSnapshot.partitionFields())) .putAllPartitionValuesByField(applyCase(unitemporalSnapshot.partitionValuesByField())) .emptyDatasetHandling(unitemporalSnapshot.emptyDatasetHandling()) + .deduplicationStrategy(unitemporalSnapshot.deduplicationStrategy()) + .versioningStrategy(unitemporalSnapshot.versioningStrategy().accept(new VersionStrategyCaseConverter())) .build(); } @@ -126,10 +128,10 @@ public IngestMode visitUnitemporalDelta(UnitemporalDeltaAbstract unitemporalDelt return UnitemporalDelta .builder() .digestField(applyCase(unitemporalDelta.digestField())) - .dataSplitField(applyCase(unitemporalDelta.dataSplitField())) .addAllOptimizationFilters(unitemporalDelta.optimizationFilters().stream().map(filter -> applyCase(filter)).collect(Collectors.toList())) .transactionMilestoning(unitemporalDelta.transactionMilestoning().accept(new TransactionMilestoningCaseConverter())) .mergeStrategy(unitemporalDelta.mergeStrategy().accept(new MergeStrategyCaseConverter())) + .deduplicationStrategy(unitemporalDelta.deduplicationStrategy()) .versioningStrategy(unitemporalDelta.versioningStrategy().accept(new VersionStrategyCaseConverter())) .build(); } @@ -144,6 +146,8 @@ public IngestMode visitBitemporalSnapshot(BitemporalSnapshotAbstract bitemporalS .validityMilestoning(bitemporalSnapshot.validityMilestoning().accept(new ValidityMilestoningCaseConverter())) .addAllPartitionFields(applyCase(bitemporalSnapshot.partitionFields())) .putAllPartitionValuesByField(applyCase(bitemporalSnapshot.partitionValuesByField())) + .deduplicationStrategy(bitemporalSnapshot.deduplicationStrategy()) + .versioningStrategy(bitemporalSnapshot.versioningStrategy().accept(new VersionStrategyCaseConverter())) .build(); } @@ -153,11 +157,12 @@ public IngestMode visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) return BitemporalDelta .builder() .digestField(applyCase(bitemporalDelta.digestField())) - .dataSplitField(applyCase(bitemporalDelta.dataSplitField())) .transactionMilestoning(bitemporalDelta.transactionMilestoning().accept(new TransactionMilestoningCaseConverter())) .validityMilestoning(bitemporalDelta.validityMilestoning().accept(new ValidityMilestoningCaseConverter())) .deduplicationStrategy(bitemporalDelta.deduplicationStrategy()) .mergeStrategy(bitemporalDelta.mergeStrategy().accept(new MergeStrategyCaseConverter())) + .versioningStrategy(bitemporalDelta.versioningStrategy().accept(new VersionStrategyCaseConverter())) + .filterExistingRecords(bitemporalDelta.filterExistingRecords()) .build(); } @@ -168,6 +173,8 @@ public IngestMode visitBulkLoad(BulkLoadAbstract bulkLoad) .batchIdField(applyCase(bulkLoad.batchIdField())) .digestGenStrategy(bulkLoad.digestGenStrategy().accept(new DigestGenStrategyCaseConverter())) .auditing(bulkLoad.auditing().accept(new AuditingCaseConverter())) + .deduplicationStrategy(bulkLoad.deduplicationStrategy()) + .versioningStrategy(bulkLoad.versioningStrategy().accept(new VersionStrategyCaseConverter())) .build(); } @@ -341,11 +348,23 @@ public VersioningStrategy visitMaxVersionStrategy(MaxVersionStrategyAbstract max { return MaxVersionStrategy .builder() - .versioningComparator(maxVersionStrategy.versioningComparator()) + .mergeDataVersionResolver(maxVersionStrategy.mergeDataVersionResolver()) .versioningField(strategy.apply(maxVersionStrategy.versioningField())) - .performDeduplication(maxVersionStrategy.performDeduplication()) + .performStageVersioning(maxVersionStrategy.performStageVersioning()) .build(); } - } + @Override + public VersioningStrategy visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + return AllVersionsStrategy + .builder() + .mergeDataVersionResolver(allVersionsStrategyAbstract.mergeDataVersionResolver()) + .versioningField(strategy.apply(allVersionsStrategyAbstract.versioningField())) + .versioningOrder(allVersionsStrategyAbstract.versioningOrder()) + .dataSplitFieldName(strategy.apply(allVersionsStrategyAbstract.dataSplitFieldName())) + .performStageVersioning(allVersionsStrategyAbstract.performStageVersioning()) + .build(); + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeOptimizationColumnHandler.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeOptimizationColumnHandler.java index b24cabc68ab..381980a1c5c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeOptimizationColumnHandler.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeOptimizationColumnHandler.java @@ -64,11 +64,11 @@ public IngestMode visitUnitemporalDelta(UnitemporalDeltaAbstract unitemporalDelt return UnitemporalDelta .builder() .digestField(unitemporalDelta.digestField()) - .dataSplitField(unitemporalDelta.dataSplitField()) .addAllOptimizationFilters(deriveOptimizationFilters(unitemporalDelta)) .transactionMilestoning(unitemporalDelta.transactionMilestoning()) .mergeStrategy(unitemporalDelta.mergeStrategy()) .versioningStrategy(unitemporalDelta.versioningStrategy()) + .deduplicationStrategy(unitemporalDelta.deduplicationStrategy()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java index 3cb54406d79..0a255978ace 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java @@ -15,10 +15,6 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.common.OptimizationFilter; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategyAbstract; @@ -41,7 +37,7 @@ private IngestModeVisitors() @Override public Boolean visitAppendOnly(AppendOnlyAbstract appendOnly) { - return appendOnly.deduplicationStrategy().accept(DEDUPLICATION_STRATEGY_DIGEST_REQUIRED); + return appendOnly.filterExistingRecords(); } @Override @@ -158,7 +154,12 @@ public Set visitNontemporalSnapshot(NontemporalSnapshotAbstract nontempo @Override public Set visitNontemporalDelta(NontemporalDeltaAbstract nontemporalDelta) { - return Collections.singleton(nontemporalDelta.digestField()); + Set metaFields = new HashSet<>(); + + metaFields.add(nontemporalDelta.digestField()); + nontemporalDelta.dataSplitField().ifPresent(metaFields::add); + + return metaFields; } @Override @@ -360,27 +361,6 @@ public List visitBulkLoad(BulkLoadAbstract bulkLoad) } }; - private static final DeduplicationStrategyVisitor DEDUPLICATION_STRATEGY_DIGEST_REQUIRED = new DeduplicationStrategyVisitor() - { - @Override - public Boolean visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) - { - return false; - } - - @Override - public Boolean visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) - { - return true; - } - - @Override - public Boolean visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) - { - return false; - } - }; - private static final DigestGenStrategyVisitor DIGEST_GEN_STRATEGY_DIGEST_REQUIRED = new DigestGenStrategyVisitor() { @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaAbstract.java index 7e5953f6ad1..962a2e3258f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaAbstract.java @@ -15,12 +15,13 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.ingestmode.audit.Auditing; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.NoVersioningStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.NoDeletesMergeStrategy; - -import java.util.Optional; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategyVisitor; +import org.immutables.value.Value; import static org.immutables.value.Value.Default; import static org.immutables.value.Value.Immutable; @@ -40,14 +41,6 @@ public interface NontemporalDeltaAbstract extends IngestMode Auditing auditing(); - Optional dataSplitField(); - - @Default - default VersioningStrategy versioningStrategy() - { - return NoVersioningStrategy.builder().build(); - } - @Default default MergeStrategy mergeStrategy() { @@ -59,4 +52,38 @@ default T accept(IngestModeVisitor visitor) { return visitor.visitNontemporalDelta(this); } + + @Value.Check + default void validate() + { + versioningStrategy().accept(new VersioningStrategyVisitor() + { + + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + if (!maxVersionStrategy.mergeDataVersionResolver().isPresent()) + { + throw new IllegalStateException("Cannot build NontemporalDelta, VersioningResolver is mandatory for MaxVersionStrategy"); + } + return null; + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (!allVersionsStrategyAbstract.mergeDataVersionResolver().isPresent()) + { + throw new IllegalStateException("Cannot build NontemporalDelta, VersioningResolver is mandatory for AllVersionsStrategy"); + } + return null; + } + }); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotAbstract.java index 4b01c4c9c52..7b3984bd871 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotAbstract.java @@ -15,8 +15,11 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.ingestmode.audit.Auditing; - -import java.util.Optional; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategyVisitor; +import org.immutables.value.Value; import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; @@ -33,11 +36,35 @@ public interface NontemporalSnapshotAbstract extends IngestMode { Auditing auditing(); - Optional dataSplitField(); - @Override default T accept(IngestModeVisitor visitor) { return visitor.visitNontemporalSnapshot(this); } + + @Value.Check + default void validate() + { + // Allowed Versioning Strategy - NoVersioning, MaxVersioining + this.versioningStrategy().accept(new VersioningStrategyVisitor() + { + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + return null; + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + throw new IllegalStateException("Cannot build NontemporalSnapshot, AllVersionsStrategy not supported"); + } + }); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaAbstract.java index 1a6cbc3cb29..c363a0dbcc7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaAbstract.java @@ -15,12 +15,15 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.common.OptimizationFilter; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.NoVersioningStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.NoDeletesMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoned; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoning; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategyVisitor; +import org.immutables.value.Value; import java.util.List; import java.util.Optional; @@ -41,19 +44,11 @@ public interface UnitemporalDeltaAbstract extends IngestMode, TransactionMilesto { String digestField(); - Optional dataSplitField(); - List optimizationFilters(); @Override TransactionMilestoning transactionMilestoning(); - @Default - default VersioningStrategy versioningStrategy() - { - return NoVersioningStrategy.builder().build(); - } - @Default default MergeStrategy mergeStrategy() { @@ -65,4 +60,39 @@ default T accept(IngestModeVisitor visitor) { return visitor.visitUnitemporalDelta(this); } -} + + @Value.Check + default void validate() + { + versioningStrategy().accept(new VersioningStrategyVisitor() + { + + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + if (!maxVersionStrategy.mergeDataVersionResolver().isPresent()) + { + throw new IllegalStateException("Cannot build UnitemporalDelta, MergeDataVersionResolver is mandatory for MaxVersionStrategy"); + } + return null; + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (!allVersionsStrategyAbstract.mergeDataVersionResolver().isPresent()) + { + throw new IllegalStateException("Cannot build UnitemporalDelta, MergeDataVersionResolver is mandatory for AllVersionsStrategy"); + } + return null; + } + }); + } + +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java index 0ed6847395f..3ad65815215 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java @@ -18,10 +18,17 @@ import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.EmptyDatasetHandling; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoned; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoning; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategyVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MergeDataVersionResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolverAbstract; import org.immutables.value.Value; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import static org.immutables.value.Value.Derived; @@ -83,5 +90,37 @@ default void validate() } } } + + // Allowed Versioning Strategy - NoVersioning, MaxVersioining + this.versioningStrategy().accept(new VersioningStrategyVisitor() + { + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + Optional versionResolver = maxVersionStrategy.mergeDataVersionResolver(); + if (!versionResolver.isPresent()) + { + throw new IllegalStateException("Cannot build UnitemporalSnapshot, MergeDataVersionResolver is mandatory for MaxVersionStrategy"); + } + if (!(versionResolver.orElseThrow(IllegalStateException::new) instanceof DigestBasedResolverAbstract)) + { + throw new IllegalStateException("Cannot build UnitemporalSnapshot, Only DIGEST_BASED VersioningResolver allowed for this ingest mode"); + } + return null; + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + throw new IllegalStateException("Cannot build UnitemporalSnapshot, AllVersionsStrategy not supported"); + } + }); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DatasetDeduplicationHandler.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DatasetDeduplicationHandler.java new file mode 100644 index 00000000000..8f2217604d0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DatasetDeduplicationHandler.java @@ -0,0 +1,69 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.deduplication; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; + +import java.util.ArrayList; +import java.util.List; + +public class DatasetDeduplicationHandler implements DeduplicationStrategyVisitor +{ + + public static final String COUNT = "legend_persistence_count"; + + Dataset stagingDataset; + + public DatasetDeduplicationHandler(Dataset stagingDataset) + { + this.stagingDataset = stagingDataset; + } + + @Override + public Dataset visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + { + return stagingDataset; + } + + @Override + public Dataset visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + { + return selectionWithGroupByAllColumns(); + } + + @Override + public Dataset visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + { + return selectionWithGroupByAllColumns(); + } + + private Dataset selectionWithGroupByAllColumns() + { + List allColumns = new ArrayList<>(stagingDataset.schemaReference().fieldValues()); + List allColumnsWithCount = new ArrayList<>(stagingDataset.schemaReference().fieldValues()); + + Value count = FunctionImpl.builder().functionName(FunctionName.COUNT).addValue(All.INSTANCE).alias(COUNT).build(); + allColumnsWithCount.add(count); + Selection selectionWithGroupByAllColumns = Selection.builder() + .source(stagingDataset) + .addAllFields(allColumnsWithCount) + .groupByFields(allColumns) + .build(); + return selectionWithGroupByAllColumns; + } + +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DatasetDeduplicator.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DatasetDeduplicator.java deleted file mode 100644 index 275515bb448..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DatasetDeduplicator.java +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; - -import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; -import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; -import org.finos.legend.engine.persistence.components.logicalplan.values.ObjectValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.Order; -import org.finos.legend.engine.persistence.components.logicalplan.values.OrderedField; -import org.finos.legend.engine.persistence.components.logicalplan.values.Value; -import org.finos.legend.engine.persistence.components.logicalplan.values.WindowFunction; - -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -public class DatasetDeduplicator implements VersioningStrategyVisitor -{ - - Dataset stagingDataset; - List primaryKeys; - - private static final String ROW_NUMBER = "legend_persistence_row_num"; - - public DatasetDeduplicator(Dataset stagingDataset, List primaryKeys) - { - this.stagingDataset = stagingDataset; - this.primaryKeys = primaryKeys; - } - - @Override - public Dataset visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) - { - return this.stagingDataset; - } - - @Override - public Dataset visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) - { - Dataset enrichedStagingDataset = this.stagingDataset; - if (maxVersionStrategy.performDeduplication()) - { - OrderedField orderByField = OrderedField.builder() - .fieldName(maxVersionStrategy.versioningField()) - .datasetRef(stagingDataset.datasetReference()) - .order(Order.DESC).build(); - List allColumns = new ArrayList<>(stagingDataset.schemaReference().fieldValues()); - List allColumnsWithRowNumber = new ArrayList<>(stagingDataset.schemaReference().fieldValues()); - List partitionFields = primaryKeys.stream() - .map(field -> FieldValue.builder().fieldName(field).datasetRef(stagingDataset.datasetReference()).build()) - .collect(Collectors.toList()); - Value rowNumber = WindowFunction.builder() - .windowFunction(FunctionImpl.builder().functionName(FunctionName.ROW_NUMBER).build()) - .addAllPartitionByFields(partitionFields) - .addOrderByFields(orderByField) - .alias(ROW_NUMBER) - .build(); - allColumnsWithRowNumber.add(rowNumber); - Selection selectionWithRowNumber = Selection.builder() - .source(stagingDataset) - .addAllFields(allColumnsWithRowNumber) - .alias(stagingDataset.datasetReference().alias()) - .build(); - - Condition rowNumberFilterCondition = Equals.of(FieldValue.builder().fieldName(ROW_NUMBER).datasetRefAlias(stagingDataset.datasetReference().alias()).build(), ObjectValue.of(1)); - - enrichedStagingDataset = Selection.builder() - .source(selectionWithRowNumber) - .addAllFields(allColumns) - .condition(rowNumberFilterCondition) - .alias(stagingDataset.datasetReference().alias()) - .build(); - } - return enrichedStagingDataset; - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DeduplicationVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DeduplicationVisitors.java new file mode 100644 index 00000000000..27f2986faa0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/DeduplicationVisitors.java @@ -0,0 +1,67 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.deduplication; + +import java.util.Optional; + +import static org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicationHandler.COUNT; + +public class DeduplicationVisitors +{ + + public static final DeduplicationStrategyVisitor> EXTRACT_DEDUP_FIELD = new DeduplicationStrategyVisitor>() + { + + @Override + public Optional visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + { + return Optional.empty(); + } + + @Override + public Optional visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + { + return Optional.of(COUNT); + } + + @Override + public Optional visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + { + return Optional.of(COUNT); + } + }; + + public static final DeduplicationStrategyVisitor IS_TEMP_TABLE_NEEDED = new DeduplicationStrategyVisitor() + { + + @Override + public Boolean visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + { + return false; + } + + @Override + public Boolean visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + { + return true; + } + + @Override + public Boolean visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + { + return true; + } + }; +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/AllVersionsStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/AllVersionsStrategyAbstract.java new file mode 100644 index 00000000000..cf17b66861f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/AllVersionsStrategyAbstract.java @@ -0,0 +1,100 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.immutables.value.Value; + +import java.util.Optional; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface AllVersionsStrategyAbstract extends VersioningStrategy +{ + String DATA_SPLIT = "legend_persistence_data_split"; + + String versioningField(); + + @Value.Default + default VersioningOrder versioningOrder() + { + return VersioningOrder.ASC; + } + + Optional mergeDataVersionResolver(); + + @Value.Default + default boolean performStageVersioning() + { + return true; + } + + @Value.Default + default String dataSplitFieldName() + { + return DATA_SPLIT; + } + + @Override + default T accept(VersioningStrategyVisitor visitor) + { + return visitor.visitAllVersionsStrategy(this); + } + + @Value.Check + default void validate() + { + // For VersionColumnBasedResolver, + // Versioning Order ASC: allowed comparators: > , >= + // Versioning Order DESC: allowed comparators: < , <= + mergeDataVersionResolver().ifPresent(mergeDataVersionResolver -> new MergeDataVersionResolverVisitor() + { + @Override + public Void visitDigestBasedResolver(DigestBasedResolverAbstract digestBasedResolver) + { + return null; + } + + @Override + public Void visitVersionColumnBasedResolver(VersionColumnBasedResolverAbstract versionColumnBasedResolver) + { + if (versioningOrder().equals(VersioningOrder.ASC) && + (versionColumnBasedResolver.versionComparator().equals(VersionComparator.LESS_THAN) || + versionColumnBasedResolver.versionComparator().equals(VersionComparator.LESS_THAN_EQUAL_TO))) + { + throw new IllegalStateException("Cannot build AllVersionsStrategy, Invalid comparator :" + + versionColumnBasedResolver.versionComparator()); + } + + if (versioningOrder().equals(VersioningOrder.DESC) && + (versionColumnBasedResolver.versionComparator().equals(VersionComparator.GREATER_THAN) || + versionColumnBasedResolver.versionComparator().equals(VersionComparator.GREATER_THAN_EQUAL_TO))) + { + throw new IllegalStateException("Cannot build AllVersionsStrategy, Invalid comparator :" + + versionColumnBasedResolver.versionComparator()); + } + return null; + } + }); + } +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DatasetVersioningHandler.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DatasetVersioningHandler.java new file mode 100644 index 00000000000..a6179bca580 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DatasetVersioningHandler.java @@ -0,0 +1,116 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +public class DatasetVersioningHandler implements VersioningStrategyVisitor +{ + + Dataset dataset; + List primaryKeys; + + private static final String RANK = "legend_persistence_rank"; + + public DatasetVersioningHandler(Dataset dataset, List primaryKeys) + { + this.dataset = dataset; + this.primaryKeys = primaryKeys; + } + + @Override + public Dataset visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return this.dataset; + } + + @Override + public Dataset visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + if (!maxVersionStrategy.performStageVersioning()) + { + return this.dataset; + } + OrderedField orderByField = OrderedField.builder() + .fieldName(maxVersionStrategy.versioningField()) + .datasetRef(dataset.datasetReference()) + .order(Order.DESC).build(); + List allColumns = new ArrayList<>(dataset.schemaReference().fieldValues()); + List allColumnsWithRank = new ArrayList<>(dataset.schemaReference().fieldValues()); + List partitionFields = primaryKeys.stream() + .map(field -> FieldValue.builder().fieldName(field).datasetRef(dataset.datasetReference()).build()) + .collect(Collectors.toList()); + Value rank = WindowFunction.builder() + .windowFunction(FunctionImpl.builder().functionName(FunctionName.DENSE_RANK).build()) + .addAllPartitionByFields(partitionFields) + .addOrderByFields(orderByField) + .alias(RANK) + .build(); + allColumnsWithRank.add(rank); + Selection selectionWithRank = Selection.builder() + .source(dataset) + .addAllFields(allColumnsWithRank) + .alias(dataset.datasetReference().alias()) + .build(); + + Condition rankFilterCondition = Equals.of(FieldValue.builder().fieldName(RANK).datasetRefAlias(dataset.datasetReference().alias()).build(), ObjectValue.of(1)); + + Dataset enrichedStagingDataset = Selection.builder() + .source(selectionWithRank) + .addAllFields(allColumns) + .condition(rankFilterCondition) + .build(); + + return enrichedStagingDataset; + } + + @Override + public Dataset visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (!allVersionsStrategyAbstract.performStageVersioning()) + { + return this.dataset; + } + OrderedField orderByField = OrderedField.builder() + .fieldName(allVersionsStrategyAbstract.versioningField()) + .datasetRef(dataset.datasetReference()) + .order(Order.ASC).build(); + List partitionFields = primaryKeys.stream() + .map(field -> FieldValue.builder().fieldName(field).datasetRef(dataset.datasetReference()).build()) + .collect(Collectors.toList()); + Value rank = WindowFunction.builder() + .windowFunction(FunctionImpl.builder().functionName(FunctionName.DENSE_RANK).build()) + .addAllPartitionByFields(partitionFields) + .addOrderByFields(orderByField) + .alias(allVersionsStrategyAbstract.dataSplitFieldName()) + .build(); + List allColumnsWithRank = new ArrayList<>(dataset.schemaReference().fieldValues()); + + allColumnsWithRank.add(rank); + Selection selectionWithRank = Selection.builder() + .source(dataset) + .addAllFields(allColumnsWithRank) + .build(); + return selectionWithRank; + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java new file mode 100644 index 00000000000..86cd4c59e4e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveDataErrorCheckLogicalPlan.java @@ -0,0 +1,115 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; + +import java.util.ArrayList; +import java.util.List; + +public class DeriveDataErrorCheckLogicalPlan implements VersioningStrategyVisitor +{ + + List primaryKeys; + List remainingColumns; + Dataset tempStagingDataset; + + public DeriveDataErrorCheckLogicalPlan(List primaryKeys, List remainingColumns, Dataset tempStagingDataset) + { + this.primaryKeys = primaryKeys; + this.remainingColumns = remainingColumns; + this.tempStagingDataset = tempStagingDataset; + } + + @Override + public LogicalPlan visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public LogicalPlan visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + if (maxVersionStrategy.performStageVersioning()) + { + return getLogicalPlanForDataErrorCheck(maxVersionStrategy.versioningField()); + } + else + { + return null; + } + } + + @Override + public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (allVersionsStrategyAbstract.performStageVersioning()) + { + return getLogicalPlanForDataErrorCheck(allVersionsStrategyAbstract.versioningField()); + } + else + { + return null; + } + } + + private LogicalPlan getLogicalPlanForDataErrorCheck(String versionField) + { + String maxDataErrorAlias = DedupAndVersionErrorStatistics.MAX_DATA_ERRORS.name(); + String distinctRowCount = "legend_persistence_distinct_rows"; + List pKsAndVersion = new ArrayList<>(); + for (String pk: primaryKeys) + { + pKsAndVersion.add(FieldValue.builder().fieldName(pk).build()); + } + pKsAndVersion.add(FieldValue.builder().fieldName(versionField).build()); + + List distinctValueFields = new ArrayList<>(); + for (String field: remainingColumns) + { + distinctValueFields.add(FieldValue.builder().fieldName(field).build()); + } + + FunctionImpl countDistinct = FunctionImpl.builder() + .functionName(FunctionName.COUNT) + .addValue(FunctionImpl.builder().functionName(FunctionName.DISTINCT).addAllValue(distinctValueFields).build()) + .alias(distinctRowCount) + .build(); + + Selection selectCountDataError = Selection.builder() + .source(tempStagingDataset) + .groupByFields(pKsAndVersion) + .addFields(countDistinct) + .alias(tempStagingDataset.datasetReference().alias()) + .build(); + FunctionImpl maxCount = FunctionImpl.builder() + .functionName(FunctionName.MAX) + .addValue(FieldValue.builder().fieldName(distinctRowCount).build()) + .alias(maxDataErrorAlias) + .build(); + Selection maxDataErrorCount = Selection.builder() + .source(selectCountDataError) + .addFields(maxCount) + .build(); + return LogicalPlan.builder().addOps(maxDataErrorCount).build(); + } + +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveTempStagingSchemaDefinition.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveTempStagingSchemaDefinition.java new file mode 100644 index 00000000000..b7892f3e7ef --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DeriveTempStagingSchemaDefinition.java @@ -0,0 +1,107 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.*; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import static org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicationHandler.COUNT; + +public class DeriveTempStagingSchemaDefinition implements VersioningStrategyVisitor +{ + DeduplicationStrategy deduplicationStrategy; + private SchemaDefinition.Builder schemaDefBuilder; + private List schemaFields; + + boolean anyPKInStaging; + + public DeriveTempStagingSchemaDefinition(SchemaDefinition stagingSchema, DeduplicationStrategy deduplicationStrategy) + { + this.deduplicationStrategy = deduplicationStrategy; + this.schemaDefBuilder = SchemaDefinition.builder() + .addAllIndexes(stagingSchema.indexes()) + .shardSpecification(stagingSchema.shardSpecification()) + .columnStoreSpecification(stagingSchema.columnStoreSpecification()); + anyPKInStaging = stagingSchema.fields().stream().anyMatch(field -> field.primaryKey()); + this.schemaFields = new ArrayList<>(stagingSchema.fields()); + Optional fieldToAddForDedup = deduplicationStrategy.accept(GET_FIELD_NEEDED_FOR_DEDUPLICATION); + fieldToAddForDedup.ifPresent(this.schemaFields::add); + } + + @Override + public SchemaDefinition visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + + return schemaDefBuilder.addAllFields(schemaFields).build(); + } + + @Override + public SchemaDefinition visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + return schemaDefBuilder.addAllFields(schemaFields).build(); + } + + @Override + public SchemaDefinition visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + if (allVersionsStrategyAbstract.performStageVersioning()) + { + Field dataSplit = Field.builder().name(allVersionsStrategyAbstract.dataSplitFieldName()) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .primaryKey(anyPKInStaging) + .build(); + schemaFields.add(dataSplit); + } + return schemaDefBuilder.addAllFields(schemaFields).build(); + } + + public static final DeduplicationStrategyVisitor> GET_FIELD_NEEDED_FOR_DEDUPLICATION = new DeduplicationStrategyVisitor>() + { + @Override + public Optional visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + { + return Optional.empty(); + } + + @Override + public Optional visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + { + return getDedupField(); + } + + @Override + public Optional visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + { + return getDedupField(); + } + + private Optional getDedupField() + { + Field count = Field.builder().name(COUNT) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .primaryKey(false) + .build(); + return Optional.of(count); + } + + }; +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DigestBasedResolverAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DigestBasedResolverAbstract.java new file mode 100644 index 00000000000..6f5f549e87d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/DigestBasedResolverAbstract.java @@ -0,0 +1,37 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface DigestBasedResolverAbstract extends MergeDataVersionResolver +{ + + org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver INSTANCE = org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver.builder().build(); + + @Override + default T accept(MergeDataVersionResolverVisitor visitor) + { + return visitor.visitDigestBasedResolver(this); + } +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MaxVersionStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MaxVersionStrategyAbstract.java new file mode 100644 index 00000000000..53c9cbe5863 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MaxVersionStrategyAbstract.java @@ -0,0 +1,77 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import org.immutables.value.Value; + +import java.util.Optional; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface MaxVersionStrategyAbstract extends VersioningStrategy +{ + @Value.Parameter(order = 0) + String versioningField(); + + Optional mergeDataVersionResolver(); + + @Value.Default + default boolean performStageVersioning() + { + return true; + } + + @Override + default T accept(VersioningStrategyVisitor visitor) + { + return visitor.visitMaxVersionStrategy(this); + } + + + @Value.Check + default void validate() + { + // For VersionColumnBasedResolver, allowed comparators: > , >= + mergeDataVersionResolver().ifPresent(mergeDataVersionResolver -> new MergeDataVersionResolverVisitor() + { + @Override + public Void visitDigestBasedResolver(DigestBasedResolverAbstract digestBasedResolver) + { + return null; + } + + @Override + public Void visitVersionColumnBasedResolver(VersionColumnBasedResolverAbstract versionColumnBasedResolver) + { + if (versionColumnBasedResolver.versionComparator().equals(VersionComparator.LESS_THAN) || + versionColumnBasedResolver.versionComparator().equals(VersionComparator.LESS_THAN_EQUAL_TO)) + { + throw new IllegalStateException("Cannot build MaxVersionStrategy, Invalid comparator :" + + versionColumnBasedResolver.versionComparator()); + } + return null; + } + }); + } +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MergeDataVersionResolver.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MergeDataVersionResolver.java new file mode 100644 index 00000000000..c647c2b71db --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MergeDataVersionResolver.java @@ -0,0 +1,20 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +public interface MergeDataVersionResolver +{ + T accept(MergeDataVersionResolverVisitor visitor); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MergeDataVersionResolverVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MergeDataVersionResolverVisitor.java new file mode 100644 index 00000000000..9c57571c4a7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/MergeDataVersionResolverVisitor.java @@ -0,0 +1,22 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +public interface MergeDataVersionResolverVisitor +{ + T visitDigestBasedResolver(DigestBasedResolverAbstract digestBasedResolver); + + T visitVersionColumnBasedResolver(VersionColumnBasedResolverAbstract versionColumnBasedResolver); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/NoVersioningStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/NoVersioningStrategyAbstract.java similarity index 88% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/NoVersioningStrategyAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/NoVersioningStrategyAbstract.java index 910c79836c0..ec3ae32ae8e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/NoVersioningStrategyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/NoVersioningStrategyAbstract.java @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; +package org.finos.legend.engine.persistence.components.ingestmode.versioning; -import static org.immutables.value.Value.Immutable; -import static org.immutables.value.Value.Style; +import org.immutables.value.Value; -@Immutable -@Style( +@Value.Immutable +@Value.Style( typeAbstract = "*Abstract", typeImmutable = "*", jdkOnly = true, diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/MaxVersionStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersionColumnBasedResolverAbstract.java similarity index 60% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/MaxVersionStrategyAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersionColumnBasedResolverAbstract.java index 1f0763c5d8b..a64f1d6f97b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/MaxVersionStrategyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersionColumnBasedResolverAbstract.java @@ -12,41 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; +package org.finos.legend.engine.persistence.components.ingestmode.versioning; import org.immutables.value.Value; -import static org.immutables.value.Value.Immutable; -import static org.immutables.value.Value.Style; - -@Immutable -@Style( +@Value.Immutable +@Value.Style( typeAbstract = "*Abstract", typeImmutable = "*", jdkOnly = true, optionalAcceptNullable = true, strictBuilder = true ) -public interface MaxVersionStrategyAbstract extends VersioningStrategy +public interface VersionColumnBasedResolverAbstract extends MergeDataVersionResolver { @Value.Parameter(order = 0) - String versioningField(); - - @Value.Default - default VersioningComparator versioningComparator() - { - return VersioningComparator.GREATER_THAN; - } - - @Value.Default - default boolean performDeduplication() - { - return true; - } + VersionComparator versionComparator(); @Override - default T accept(VersioningStrategyVisitor visitor) + default T accept(MergeDataVersionResolverVisitor visitor) { - return visitor.visitMaxVersionStrategy(this); + return visitor.visitVersionColumnBasedResolver(this); } } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersionComparator.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersionComparator.java new file mode 100644 index 00000000000..f53db8d3248 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersionComparator.java @@ -0,0 +1,23 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +public enum VersionComparator +{ + GREATER_THAN, + GREATER_THAN_EQUAL_TO, + LESS_THAN, + LESS_THAN_EQUAL_TO; +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningConditionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningConditionVisitor.java similarity index 61% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningConditionVisitor.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningConditionVisitor.java index c560799045d..28514fad3fa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningConditionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningConditionVisitor.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; +package org.finos.legend.engine.persistence.components.ingestmode.versioning; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; @@ -43,23 +43,50 @@ public VersioningConditionVisitor(Dataset mainDataset, Dataset stagingDataset, b @Override public Condition visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) { - if (invertComparison) + return getDigestBasedVersioningCondition(); + } + + @Override + public Condition visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + MergeDataVersionResolver versionResolver = maxVersionStrategy.mergeDataVersionResolver().orElseThrow(IllegalStateException::new); + return versionResolver.accept(new VersioningCondition(maxVersionStrategy.versioningField())); + } + + @Override + public Condition visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategy) + { + MergeDataVersionResolver versionResolver = allVersionsStrategy.mergeDataVersionResolver().orElseThrow(IllegalStateException::new); + return versionResolver.accept(new VersioningCondition(allVersionsStrategy.versioningField())); + } + + private class VersioningCondition implements MergeDataVersionResolverVisitor + { + private String versioningField; + + public VersioningCondition(String versioningField) { - return LogicalPlanUtils.getDigestMatchCondition(mainDataset, stagingDataset, digestField); + this.versioningField = versioningField; } - else + + @Override + public Condition visitDigestBasedResolver(DigestBasedResolverAbstract digestBasedResolver) { - return LogicalPlanUtils.getDigestDoesNotMatchCondition(mainDataset, stagingDataset, digestField); + return getDigestBasedVersioningCondition(); + } + + @Override + public Condition visitVersionColumnBasedResolver(VersionColumnBasedResolverAbstract versionColumnBasedResolver) + { + FieldValue mainVersioningField = FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(versioningField).build(); + FieldValue stagingVersioningField = FieldValue.builder().datasetRef(stagingDataset.datasetReference()).fieldName(versioningField).build(); + return getVersioningCondition(mainVersioningField, stagingVersioningField, versionColumnBasedResolver.versionComparator()); } } - @Override - public Condition visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + private Condition getVersioningCondition(FieldValue mainVersioningField, FieldValue stagingVersioningField, VersionComparator versionComparator) { - FieldValue mainVersioningField = FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(maxVersionStrategy.versioningField()).build(); - FieldValue stagingVersioningField = FieldValue.builder().datasetRef(stagingDataset.datasetReference()).fieldName(maxVersionStrategy.versioningField()).build(); - - switch (maxVersionStrategy.versioningComparator()) + switch (versionComparator) { case GREATER_THAN: if (invertComparison) @@ -83,4 +110,16 @@ public Condition visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionSt throw new IllegalStateException("Unsupported versioning comparator type"); } } + + private Condition getDigestBasedVersioningCondition() + { + if (invertComparison) + { + return LogicalPlanUtils.getDigestMatchCondition(mainDataset, stagingDataset, digestField); + } + else + { + return LogicalPlanUtils.getDigestDoesNotMatchCondition(mainDataset, stagingDataset, digestField); + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningComparator.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningOrder.java similarity index 87% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningComparator.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningOrder.java index fe1aee3c611..20d70cb8262 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningComparator.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningOrder.java @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; +package org.finos.legend.engine.persistence.components.ingestmode.versioning; -public enum VersioningComparator +public enum VersioningOrder { - GREATER_THAN, - GREATER_THAN_EQUAL_TO + ASC, DESC } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningStrategy.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningStrategy.java similarity index 97% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningStrategy.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningStrategy.java index dc1ce0e2ada..fd503e4ec87 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningStrategy.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningStrategy.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; +package org.finos.legend.engine.persistence.components.ingestmode.versioning; public interface VersioningStrategy { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningStrategyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningStrategyVisitor.java similarity index 89% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningStrategyVisitor.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningStrategyVisitor.java index 8aa8af545b7..c3e38ce1bd3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/deduplication/VersioningStrategyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningStrategyVisitor.java @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.deduplication; +package org.finos.legend.engine.persistence.components.ingestmode.versioning; public interface VersioningStrategyVisitor { T visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy); T visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy); + + T visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningVisitors.java new file mode 100644 index 00000000000..9276d441340 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/versioning/VersioningVisitors.java @@ -0,0 +1,88 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.versioning; + +import java.util.Optional; + +public class VersioningVisitors +{ + + public static final VersioningStrategyVisitor> EXTRACT_DATA_SPLIT_FIELD = new VersioningStrategyVisitor>() + { + @Override + public Optional visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return Optional.empty(); + } + + @Override + public Optional visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + return Optional.empty(); + } + + @Override + public Optional visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + return Optional.of(allVersionsStrategyAbstract.dataSplitFieldName()); + } + }; + + public static final VersioningStrategyVisitor IS_TEMP_TABLE_NEEDED = new VersioningStrategyVisitor() + { + + @Override + public Boolean visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return false; + } + + @Override + public Boolean visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + return maxVersionStrategy.performStageVersioning(); + } + + @Override + public Boolean visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + return allVersionsStrategyAbstract.performStageVersioning(); + } + }; + + public static final VersioningStrategyVisitor> EXTRACT_VERSIONING_FIELD = new VersioningStrategyVisitor>() + { + @Override + public Optional visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return Optional.empty(); + } + + @Override + public Optional visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + return Optional.of(maxVersionStrategy.versioningField()); + } + + @Override + public Optional visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + return Optional.of(allVersionsStrategyAbstract.versioningField()); + } + }; + + + +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java index b16e938fbc8..f841210323d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java @@ -122,4 +122,13 @@ public static LogicalPlan getLogicalPlanForMinAndMaxForField(Dataset dataset, St .source(dataset).build(); return LogicalPlan.builder().addOps(selection).build(); } + + public static LogicalPlan getLogicalPlanForMaxOfField(Dataset dataset, String fieldName) + { + FieldValue field = FieldValue.builder().datasetRef(dataset.datasetReference()).fieldName(fieldName).build(); + Selection selection = Selection.builder() + .addFields(FunctionImpl.builder().functionName(FunctionName.MAX).addValue(field).alias(MAX_OF_FIELD).build()) + .source(dataset).build(); + return LogicalPlan.builder().addOps(selection).build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java index 03c778e940d..917525b2fc8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SelectionAbstract.java @@ -17,11 +17,13 @@ import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.quantifiers.Quantifier; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.immutables.value.Value.Derived; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Style; +import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -56,4 +58,22 @@ default DatasetReference datasetReference() .alias(alias()) .build(); } + + @Derived + default SchemaReference schemaReference() + { + List list = new ArrayList<>(); + for (Value value: fields()) + { + if (value instanceof FieldValue) + { + list.add((FieldValue) value); + } + else if (value.alias().isPresent()) + { + list.add(FieldValue.builder().fieldName(value.alias().get()).alias(value.alias()).datasetRef(datasetReference()).build()); + } + } + return SchemaReference.builder().addAllFieldValues(list).build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java index 706830f613f..ac3a3e047fe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java @@ -20,6 +20,7 @@ public enum FunctionName MAX, MIN, COUNT, + DISTINCT, COALESCE, CURRENT_TIME, CURRENT_DATE, @@ -27,6 +28,7 @@ public enum FunctionName CURRENT_TIMESTAMP, UPPER, ROW_NUMBER, + DENSE_RANK, SUBSTRING, PARSE_JSON, DATE, diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java index 0476ee62baf..c2227940d49 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java @@ -22,10 +22,6 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditingAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditingAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; @@ -34,9 +30,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; @@ -44,12 +38,12 @@ import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.Consumer; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.ALL_COLUMNS; @@ -64,13 +58,20 @@ class AppendOnlyPlanner extends Planner { super(datasets, ingestMode, plannerOptions, capabilities); - // validate - ingestMode.deduplicationStrategy().accept(new ValidatePrimaryKeys(primaryKeys, this::validatePrimaryKeysIsEmpty, - this::validatePrimaryKeysNotEmpty, ingestMode.dataSplitField().isPresent())); - // if data splits are present, then audit Column must be a PK - if (ingestMode.dataSplitField().isPresent()) + // Validation + // 1. If primary keys are present, then auditing must be turned on and the auditing column must be one of the primary keys + if (!primaryKeys.isEmpty()) { - ingestMode.auditing().accept(ValidateAuditingForDataSplits); + ingestMode.auditing().accept(new ValidateAuditingForPrimaryKeys(mainDataset())); + } + + // 2. For filterExistingRecords, we must have digest and primary keys to filter them + if (ingestMode.filterExistingRecords()) + { + if (!ingestMode.digestField().isPresent() || primaryKeys.isEmpty()) + { + throw new IllegalStateException("Primary keys and digest are mandatory for filterExistingRecords"); + } } this.dataSplitInRangeCondition = ingestMode.dataSplitField().map(field -> LogicalPlanUtils.getDataSplitInRangeCondition(stagingDataset(), field)); @@ -85,14 +86,9 @@ protected AppendOnly ingestMode() @Override public LogicalPlan buildLogicalPlanForIngest(Resources resources) { - List fieldsToSelect = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - - if (ingestMode().dataSplitField().isPresent()) - { - LogicalPlanUtils.removeField(fieldsToSelect, ingestMode().dataSplitField().get()); - LogicalPlanUtils.removeField(fieldsToInsert, ingestMode().dataSplitField().get()); - } + List dataFields = getDataFields(); + List fieldsToSelect = new ArrayList<>(dataFields); + List fieldsToInsert = new ArrayList<>(dataFields); if (ingestMode().auditing().accept(AUDIT_ENABLED)) { @@ -104,237 +100,114 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } else if (!ingestMode().dataSplitField().isPresent()) { + // this is just to print a "*" when we are in the simplest case (no auditing, no data split) fieldsToSelect = LogicalPlanUtils.ALL_COLUMNS(); } - Dataset selectStage = ingestMode().deduplicationStrategy().accept(new SelectStageDatasetBuilder( - mainDataset(), stagingDataset(), ingestMode(), primaryKeys, dataSplitInRangeCondition, fieldsToSelect)); + Dataset selectStage = ingestMode().filterExistingRecords() ? getSelectStageWithFilterExistingRecords(fieldsToSelect) : getSelectStage(fieldsToSelect); return LogicalPlan.of(Collections.singletonList(Insert.of(mainDataset(), selectStage, fieldsToInsert))); } @Override - public LogicalPlan buildLogicalPlanForPreActions(Resources resources) + List getDigestOrRemainingColumns() { - List operations = new ArrayList<>(); - operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) + List remainingCols = new ArrayList<>(); + if (ingestMode().digestField().isPresent()) { - operations.add(Create.of(true, stagingDataset())); + remainingCols = Arrays.asList(ingestMode().digestField().get()); } - if (options().enableConcurrentSafety()) + else if (!primaryKeys.isEmpty()) { - operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + remainingCols = getNonPKNonVersionDataFields(); } - return LogicalPlan.of(operations); - } - - protected void addPostRunStatsForRowsInserted(Map postRunStatisticsResult) - { - Optional dataSplitInRangeCondition = dataSplitExecutionSupported() ? getDataSplitInRangeConditionForStatistics() : Optional.empty(); - ingestMode().deduplicationStrategy().accept(new PopulatePostRunStatisticsBreakdown(ingestMode(), mainDataset(), stagingDataset(), postRunStatisticsResult, dataSplitInRangeCondition)); - } - - public Optional getDataSplitInRangeConditionForStatistics() - { - return dataSplitInRangeCondition; + return remainingCols; } - private AuditingVisitor ValidateAuditingForDataSplits = new AuditingVisitor() + private Dataset getSelectStage(List fieldsToSelect) { - @Override - public Void visitNoAuditing(NoAuditingAbstract noAuditing) + if (ingestMode().dataSplitField().isPresent()) { - throw new IllegalStateException("DataSplits not supported for NoAuditing mode"); + return Selection.builder().source(stagingDataset()).condition(dataSplitInRangeCondition).addAllFields(fieldsToSelect).build(); } - - @Override - public Void visitDateTimeAuditing(DateTimeAuditingAbstract dateTimeAuditing) + else { - // For Data splits, audit column must be a PK - Field dateTimeAuditingField = mainDataset().schema().fields().stream() - .filter(field -> field.name().equalsIgnoreCase(dateTimeAuditing.dateTimeField())) - .findFirst().orElseThrow(() -> new IllegalStateException("dateTimeField is mandatory Field for dateTimeAuditing mode")); - if (!dateTimeAuditingField.primaryKey()) - { - throw new IllegalStateException("dateTimeField must be a Primary Key for Data Splits"); - } - return null; + return Selection.builder().source(stagingDataset()).addAllFields(fieldsToSelect).build(); } - }; + } - static class ValidatePrimaryKeys implements DeduplicationStrategyVisitor + private Dataset getSelectStageWithFilterExistingRecords(List fieldsToSelect) { - final List primaryKeys; - final Consumer> validatePrimaryKeysIsEmpty; - final Consumer> validatePrimaryKeysNotEmpty; - final boolean dataSplitsEnabled; - - ValidatePrimaryKeys(List primaryKeys, Consumer> validatePrimaryKeysIsEmpty, Consumer> validatePrimaryKeysNotEmpty, boolean dataSplitsEnabled) - { - this.primaryKeys = primaryKeys; - this.validatePrimaryKeysIsEmpty = validatePrimaryKeysIsEmpty; - this.validatePrimaryKeysNotEmpty = validatePrimaryKeysNotEmpty; - this.dataSplitsEnabled = dataSplitsEnabled; - } - - @Override - public Void visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) + Condition notExistInSinkCondition = Not.of(Exists.of(Selection.builder() + .source(mainDataset()) + .condition(And.builder() + .addConditions( + getPrimaryKeyMatchCondition(mainDataset(), stagingDataset(), primaryKeys.toArray(new String[0])), + getDigestMatchCondition(mainDataset(), stagingDataset(), ingestMode().digestField().orElseThrow(IllegalStateException::new))) + .build()) + .addAllFields(ALL_COLUMNS()) + .build())); + + Condition selectCondition; + if (ingestMode().dataSplitField().isPresent()) { - // If data splits are enabled, then PKs are allowed, Otherwise PKs are not allowed - if (!dataSplitsEnabled) - { - validatePrimaryKeysIsEmpty.accept(primaryKeys); - } - return null; + selectCondition = And.builder().addConditions(dataSplitInRangeCondition.orElseThrow(IllegalStateException::new), notExistInSinkCondition).build(); } - - @Override - public Void visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + else { - validatePrimaryKeysNotEmpty.accept(primaryKeys); - return null; + selectCondition = notExistInSinkCondition; } - @Override - public Void visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) - { - validatePrimaryKeysNotEmpty.accept(primaryKeys); - return null; - } + return Selection.builder().source(stagingDataset()).condition(selectCondition).addAllFields(fieldsToSelect).build(); } - static class SelectStageDatasetBuilder implements DeduplicationStrategyVisitor + protected void addPostRunStatsForRowsInserted(Map postRunStatisticsResult) { - final Dataset mainDataset; - final Dataset stagingDataset; - final AppendOnly ingestMode; - final List primaryKeys; - final Optional dataSplitInRangeCondition; - - final List fieldsToSelect; - - SelectStageDatasetBuilder(Dataset mainDataset, Dataset stagingDataset, AppendOnly ingestMode, List primaryKeys, Optional dataSplitInRangeCondition, List fieldsToSelect) - { - this.mainDataset = mainDataset; - this.stagingDataset = stagingDataset; - this.ingestMode = ingestMode; - this.primaryKeys = primaryKeys; - this.dataSplitInRangeCondition = dataSplitInRangeCondition; - this.fieldsToSelect = fieldsToSelect; - } - - @Override - public Dataset visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) - { - return selectStageDatasetWithoutDuplicateFiltering(); - } - - @Override - public Dataset visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + if (ingestMode().auditing().accept(AUDIT_ENABLED)) { - Condition notExistInSinkCondition = Not.of(Exists.of(Selection.builder() - .source(mainDataset) - .condition(And.builder() - .addConditions( - getPrimaryKeyMatchCondition(mainDataset, stagingDataset, primaryKeys.toArray(new String[0])), - getDigestMatchCondition(mainDataset, stagingDataset, ingestMode.digestField().orElseThrow(IllegalStateException::new))) - .build()) - .addAllFields(ALL_COLUMNS()) - .build())); - - Condition selectCondition; - if (ingestMode.dataSplitField().isPresent()) - { - selectCondition = And.builder().addConditions(dataSplitInRangeCondition.orElseThrow(IllegalStateException::new), notExistInSinkCondition).build(); - } - else - { - selectCondition = notExistInSinkCondition; - } - - return Selection.builder().source(stagingDataset).condition(selectCondition).addAllFields(fieldsToSelect).build(); + // Rows inserted = rows in main with audit column equals latest timestamp + String auditField = ingestMode().auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); + postRunStatisticsResult.put(ROWS_INSERTED, LogicalPlan.builder() + .addOps(LogicalPlanUtils.getRowsBasedOnLatestTimestamp(mainDataset(), auditField, ROWS_INSERTED.get())) + .build()); } - - @Override - public Dataset visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + else { - return selectStageDatasetWithoutDuplicateFiltering(); + // Not supported at the moment } + } - private Dataset selectStageDatasetWithoutDuplicateFiltering() - { - if (ingestMode.dataSplitField().isPresent() && !primaryKeys.isEmpty()) - { - return Selection.builder().source(stagingDataset).condition(dataSplitInRangeCondition).addAllFields(fieldsToSelect).build(); - } - else - { - return Selection.builder().source(stagingDataset).addAllFields(fieldsToSelect).build(); - } - } + public Optional getDataSplitInRangeConditionForStatistics() + { + return dataSplitInRangeCondition; } - static class PopulatePostRunStatisticsBreakdown implements DeduplicationStrategyVisitor + static class ValidateAuditingForPrimaryKeys implements AuditingVisitor { - final AppendOnly ingestMode; final Dataset mainDataset; - final Dataset stagingDataset; - final Map postRunStatisticsResult; - Optional dataSplitInRangeCondition; - PopulatePostRunStatisticsBreakdown(AppendOnly ingestMode, Dataset mainDataset, Dataset stagingDataset, Map postRunStatisticsResult, Optional dataSplitInRangeCondition) + ValidateAuditingForPrimaryKeys(Dataset mainDataset) { - this.ingestMode = ingestMode; this.mainDataset = mainDataset; - this.stagingDataset = stagingDataset; - this.postRunStatisticsResult = postRunStatisticsResult; - this.dataSplitInRangeCondition = dataSplitInRangeCondition; } @Override - public Void visitAllowDuplicates(AllowDuplicatesAbstract allowDuplicates) - { - return populateInsertedRecordsCountUsingStagingDataset(); - } - - @Override - public Void visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) + public Void visitNoAuditing(NoAuditingAbstract noAuditing) { - return populateInsertedRecordsCountUsingStagingDataset(); + throw new IllegalStateException("NoAuditing not allowed when there are primary keys"); } @Override - public Void visitFilterDuplicates(FilterDuplicatesAbstract filterDuplicates) + public Void visitDateTimeAuditing(DateTimeAuditingAbstract dateTimeAuditing) { - if (ingestMode.auditing().accept(AUDIT_ENABLED)) - { - // Rows inserted = rows in main with audit column equals latest timestamp - String auditField = ingestMode.auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); - postRunStatisticsResult.put(ROWS_INSERTED, LogicalPlan.builder() - .addOps(LogicalPlanUtils.getRowsBasedOnLatestTimestamp(mainDataset, auditField, ROWS_INSERTED.get())) - .build()); - } - else + Field dateTimeAuditingField = mainDataset.schema().fields().stream() + .filter(field -> field.name().equalsIgnoreCase(dateTimeAuditing.dateTimeField())) + .findFirst().orElseThrow(() -> new IllegalStateException("dateTimeField is mandatory Field for dateTimeAuditing mode")); + if (!dateTimeAuditingField.primaryKey()) { - // Not supported at the moment + throw new IllegalStateException("auditing dateTimeField must be a primary key when there are other primary keys"); } return null; } - - private Void populateInsertedRecordsCountUsingStagingDataset() - { - LogicalPlan incomingRecordCountPlan = LogicalPlan.builder() - .addOps(LogicalPlanUtils.getRecordCount(stagingDataset, ROWS_INSERTED.get(), dataSplitInRangeCondition)) - .build(); - postRunStatisticsResult.put(ROWS_INSERTED, incomingRecordCountPlan); - return null; - } - } - - @Override - public boolean dataSplitExecutionSupported() - { - return !primaryKeys.isEmpty(); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java index 93a97abfec8..10502d86c7e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java @@ -18,7 +18,6 @@ import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitors; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromAndThruDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTime; @@ -109,7 +108,7 @@ class BitemporalDeltaPlanner extends BitemporalPlanner { super(datasets, ingestMode, plannerOptions, capabilities); - if (ingestMode().validityMilestoning().validityDerivation() instanceof SourceSpecifiesFromDateTime && ingestMode().deduplicationStrategy() instanceof FilterDuplicates) + if (ingestMode().validityMilestoning().validityDerivation() instanceof SourceSpecifiesFromDateTime && ingestMode().filterExistingRecords()) { this.stagingDataset = getStagingDatasetWithoutDuplicates(datasets); this.stagingDatasetWithoutDuplicates = Optional.of(this.stagingDataset); @@ -214,7 +213,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } else { - if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates) + if (ingestMode().filterExistingRecords()) { // Op 0: Insert records from stage table to stage without duplicates table operations.add(getStageToStageWithoutDuplicates()); @@ -242,7 +241,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) { operations.add(Delete.builder().dataset(tempDatasetWithDeleteIndicator).build()); } - if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates) + if (ingestMode().filterExistingRecords()) { operations.add(Delete.builder().dataset(stagingDataset).build()); } @@ -269,7 +268,7 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { operations.add(Create.of(true, tempDatasetWithDeleteIndicator)); } - if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates) + if (ingestMode().filterExistingRecords()) { operations.add(Create.of(true, stagingDataset)); } @@ -348,7 +347,7 @@ public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) { operations.add(Drop.of(true, tempDatasetWithDeleteIndicator, true)); } - if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates) + if (ingestMode().filterExistingRecords()) { operations.add(Drop.of(true, stagingDatasetWithoutDuplicates.orElseThrow(IllegalStateException::new), true)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java index 1d52c6bdeea..f032983ba00 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java @@ -87,23 +87,6 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) return LogicalPlan.of(operations); } - @Override - public LogicalPlan buildLogicalPlanForPreActions(Resources resources) - { - List operations = new ArrayList<>(); - operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) - { - operations.add(Create.of(true, stagingDataset())); - } - operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); - if (options().enableConcurrentSafety()) - { - operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); - } - return LogicalPlan.of(operations); - } - /* insert into main_table ( diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 8a49dd3a5fe..495ca6799f4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -219,6 +219,12 @@ public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) return LogicalPlan.of(operations); } + @Override + List getDigestOrRemainingColumns() + { + return Collections.emptyList(); + } + @Override public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java index 12042820c56..8215ecc7af0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java @@ -20,8 +20,7 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.NontemporalDelta; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicator; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningConditionVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningConditionVisitor; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitors; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; @@ -30,21 +29,18 @@ import org.finos.legend.engine.persistence.components.logicalplan.conditions.Not; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.operations.Merge; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.operations.Update; import org.finos.legend.engine.persistence.components.logicalplan.operations.UpdateAbstract; -import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; -import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.Pair; -import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Optional; @@ -54,7 +50,6 @@ class NontemporalDeltaPlanner extends Planner { - private final Dataset enrichedStagingDataset; private final Condition pkMatchCondition; private final Condition digestMatchCondition; private final Condition versioningCondition; @@ -67,6 +62,7 @@ class NontemporalDeltaPlanner extends Planner private final BatchStartTimestamp batchStartTimestamp; private final Optional dataSplitInRangeCondition; + private List dataFields; NontemporalDeltaPlanner(Datasets datasets, NontemporalDelta ingestMode, PlannerOptions plannerOptions, Set capabilities) { @@ -75,7 +71,6 @@ class NontemporalDeltaPlanner extends Planner // validate validatePrimaryKeysNotEmpty(primaryKeys); - // TODO validate interBatchDedup Strategies this.pkMatchCondition = LogicalPlanUtils.getPrimaryKeyMatchCondition(mainDataset(), stagingDataset(), primaryKeys.toArray(new String[0])); this.digestMatchCondition = LogicalPlanUtils.getDigestMatchCondition(mainDataset(), stagingDataset(), ingestMode().digestField()); this.versioningCondition = ingestMode().versioningStrategy() @@ -86,14 +81,9 @@ class NontemporalDeltaPlanner extends Planner this.deleteIndicatorIsNotSetCondition = deleteIndicatorField.map(field -> LogicalPlanUtils.getDeleteIndicatorIsNotSetCondition(stagingDataset(), field, deleteIndicatorValues)); this.deleteIndicatorIsSetCondition = deleteIndicatorField.map(field -> LogicalPlanUtils.getDeleteIndicatorIsSetCondition(stagingDataset(), field, deleteIndicatorValues)); - this.batchStartTimestamp = BatchStartTimestamp.INSTANCE; - this.dataSplitInRangeCondition = ingestMode.dataSplitField().map(field -> LogicalPlanUtils.getDataSplitInRangeCondition(stagingDataset(), field)); - - // Perform Deduplication & Filtering of Staging Dataset - this.enrichedStagingDataset = ingestMode().versioningStrategy() - .accept(new DatasetDeduplicator(stagingDataset(), primaryKeys)); + this.dataFields = getDataFields(); } @Override @@ -131,27 +121,21 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } /* - DELETE FROM main_table WHERE EXIST (SELECT * FROM staging_table WHERE pk_match AND digest_match AND staging.delete_indicator_is_match) + DELETE FROM main_table + WHERE EXIST (SELECT * FROM staging_table WHERE pk_match AND digest_match AND staging.delete_indicator_is_match) */ private Delete getDeleteOperation() { - List stagingFields = stagingDataset().schemaReference().fieldValues() - .stream() - .filter(field -> this.deleteIndicatorField.isPresent() ? !field.fieldName().equals(this.deleteIndicatorField.get()) : !field.fieldName().isEmpty()) - .collect(Collectors.toList()); - - Delete delete = Delete.builder() + return Delete.builder() .dataset(mainDataset()) .condition(Exists.builder() .source(Selection.builder() - .source(this.enrichedStagingDataset) - .addAllFields(stagingFields) + .source(stagingDataset()) + .addFields(All.INSTANCE) .condition(And.builder().addConditions(this.pkMatchCondition, this.digestMatchCondition, this.deleteIndicatorIsSetCondition.get()).build()) .build()) .build()) .build(); - - return delete; } /* @@ -166,22 +150,12 @@ WHEN MATCHED AND ((DIGEST does not match) or (delete indicator NOT match)) THEN */ private Merge getMergeOperation() { - List> keyValuePairs = stagingDataset().schemaReference().fieldValues() - .stream() - .filter(field -> this.deleteIndicatorField.isPresent() ? !field.fieldName().equals(this.deleteIndicatorField.get()) : !field.fieldName().isEmpty()) - .map(field -> Pair.of( - FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(field.fieldName()).build(), - FieldValue.builder().datasetRef(stagingDataset().datasetReference()).fieldName(field.fieldName()).build())) - .collect(Collectors.toList()); - - Dataset stagingDataset = this.enrichedStagingDataset; + List> keyValuePairs = getKeyValuePairs(); + Dataset stagingDataset = stagingDataset(); if (ingestMode().dataSplitField().isPresent()) { - keyValuePairs.removeIf(field -> field.key().fieldName().equals(ingestMode().dataSplitField().get())); - List fieldsToSelect = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - LogicalPlanUtils.removeField(fieldsToSelect, ingestMode().dataSplitField().get()); - stagingDataset = Selection.builder().source(stagingDataset).condition(this.dataSplitInRangeCondition).addAllFields(fieldsToSelect).alias(stagingDataset().datasetReference().alias()).build(); + stagingDataset = Selection.builder().source(stagingDataset).condition(this.dataSplitInRangeCondition).addAllFields(dataFields).alias(stagingDataset().datasetReference().alias()).build(); } Condition versioningCondition; @@ -224,15 +198,8 @@ private Merge getMergeOperation() private Update getUpdateOperation() { Condition joinCondition = And.builder().addConditions(this.pkMatchCondition, this.versioningCondition).build(); - Dataset stagingDataset = this.enrichedStagingDataset; - - List> keyValuePairs = stagingDataset().schemaReference().fieldValues() - .stream() - .filter(field -> this.deleteIndicatorField.isPresent() ? !field.fieldName().equals(this.deleteIndicatorField.get()) : !field.fieldName().isEmpty()) - .map(field -> Pair.of( - FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(field.fieldName()).build(), - FieldValue.builder().datasetRef(stagingDataset().datasetReference()).fieldName(field.fieldName()).build())) - .collect(Collectors.toList()); + Dataset stagingDataset = stagingDataset(); + List> keyValuePairs = getKeyValuePairs(); if (ingestMode().auditing().accept(AUDIT_ENABLED)) { @@ -242,7 +209,6 @@ private Update getUpdateOperation() if (ingestMode().dataSplitField().isPresent()) { - keyValuePairs.removeIf(field -> field.key().fieldName().equals(ingestMode().dataSplitField().get())); stagingDataset = Selection.builder().source(stagingDataset).condition(this.dataSplitInRangeCondition).addAllFields(LogicalPlanUtils.ALL_COLUMNS()).alias(stagingDataset().datasetReference().alias()).build(); } Update update = UpdateAbstract.of(mainDataset(), stagingDataset, keyValuePairs, joinCondition); @@ -250,6 +216,22 @@ private Update getUpdateOperation() return update; } + private List> getKeyValuePairs() + { + List fieldsToSelect = new ArrayList<>(dataFields); + if (deleteIndicatorField.isPresent()) + { + LogicalPlanUtils.removeField(fieldsToSelect, deleteIndicatorField.get()); + } + List> keyValuePairs = fieldsToSelect + .stream() + .map(field -> Pair.of( + FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(((FieldValue) field).fieldName()).build(), + FieldValue.builder().datasetRef(stagingDataset().datasetReference()).fieldName(((FieldValue) field).fieldName()).build())) + .collect(Collectors.toList()); + return keyValuePairs; + } + /* insert into main_table (staging_columns) (select staging_columns from stage_table @@ -258,15 +240,13 @@ insert into main_table (staging_columns) */ private Insert getInsertOperation() { - List fieldsToInsert = stagingDataset().schemaReference().fieldValues() - .stream() - .filter(field -> this.deleteIndicatorField.isPresent() ? !field.fieldName().equals(this.deleteIndicatorField.get()) : !field.fieldName().isEmpty()) - .collect(Collectors.toList()); - - List fieldsToSelect = stagingDataset().schemaReference().fieldValues() - .stream() - .filter(field -> this.deleteIndicatorField.isPresent() ? !field.fieldName().equals(this.deleteIndicatorField.get()) : !field.fieldName().isEmpty()) - .collect(Collectors.toList()); + List fieldsToSelect = new ArrayList<>(dataFields); + List fieldsToInsert = new ArrayList<>(dataFields); + if (deleteIndicatorField.isPresent()) + { + LogicalPlanUtils.removeField(fieldsToSelect, deleteIndicatorField.get()); + LogicalPlanUtils.removeField(fieldsToInsert, deleteIndicatorField.get()); + } Condition notExistInSinkCondition = Not.of(Exists.of( Selection.builder() @@ -279,8 +259,6 @@ private Insert getInsertOperation() Condition selectCondition = notExistInSinkCondition; if (ingestMode().dataSplitField().isPresent()) { - LogicalPlanUtils.removeField(fieldsToSelect, ingestMode().dataSplitField().get()); - LogicalPlanUtils.removeField(fieldsToInsert, ingestMode().dataSplitField().get()); selectCondition = And.builder().addConditions(this.dataSplitInRangeCondition.get(), notExistInSinkCondition).build(); } @@ -290,28 +268,9 @@ private Insert getInsertOperation() fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(auditField).build()); fieldsToSelect.add(this.batchStartTimestamp); } - else if (!ingestMode().dataSplitField().isPresent() && !this.deleteIndicatorField.isPresent()) - { - fieldsToSelect = LogicalPlanUtils.ALL_COLUMNS(); - } - Dataset selectStage = Selection.builder().source(this.enrichedStagingDataset).condition(selectCondition).addAllFields(fieldsToSelect).build(); - return Insert.of(mainDataset(), selectStage, fieldsToInsert); - } - @Override - public LogicalPlan buildLogicalPlanForPreActions(Resources resources) - { - List operations = new ArrayList<>(); - operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) - { - operations.add(Create.of(true, stagingDataset())); - } - if (options().enableConcurrentSafety()) - { - operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); - } - return LogicalPlan.of(operations); + Dataset selectStage = Selection.builder().source(stagingDataset()).condition(selectCondition).addAllFields(fieldsToSelect).build(); + return Insert.of(mainDataset(), selectStage, fieldsToInsert); } public Optional getDataSplitInRangeConditionForStatistics() @@ -345,6 +304,12 @@ public Map buildLogicalPlanForPreRunStatistics(Resou return preRunStatisticsResult; } + @Override + List getDigestOrRemainingColumns() + { + return Arrays.asList(ingestMode().digestField()); + } + @Override protected void addPostRunStatsForRowsDeleted(Map postRunStatisticsResult) { @@ -359,11 +324,6 @@ protected void addPreRunStatsForRowsDeleted(Map preR { if (this.deleteIndicatorField.isPresent() && this.deleteIndicatorIsSetCondition.isPresent()) { - List stagingFields = stagingDataset().schemaReference().fieldValues() - .stream() - .filter(field -> !field.fieldName().equals(this.deleteIndicatorField.get())) - .collect(Collectors.toList()); - // Rows Deleted = rows removed (hard-deleted) from sink table LogicalPlan rowsDeletedCountPlan = LogicalPlan.builder().addOps(LogicalPlanUtils .getRecordCount(mainDataset(), @@ -371,7 +331,7 @@ protected void addPreRunStatsForRowsDeleted(Map preR Optional.of(Exists.builder() .source(Selection.builder() .source(stagingDataset()) - .addAllFields(stagingFields) + .addFields(All.INSTANCE) .condition(And.builder().addConditions(this.pkMatchCondition, this.digestMatchCondition, this.deleteIndicatorIsSetCondition.get()).build()) .build()) .build()))).build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java index d5adad5f1d4..e8a5f536099 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java @@ -20,15 +20,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.LessThan; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.Not; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.Exists; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; @@ -42,12 +34,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_DELETED; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.ALL_COLUMNS; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.getPrimaryKeyMatchCondition; class NontemporalSnapshotPlanner extends Planner { @@ -65,33 +54,10 @@ protected NontemporalSnapshot ingestMode() @Override public LogicalPlan buildLogicalPlanForIngest(Resources resources) { - Dataset stagingDataset = stagingDataset(); - List fieldsToSelect = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - Optional selectCondition = Optional.empty(); + List dataFields = getDataFields(); + List fieldsToSelect = new ArrayList<>(dataFields); + List fieldsToInsert = new ArrayList<>(dataFields); - // If data splits is enabled, add the condition to pick only the latest data split - if (ingestMode().dataSplitField().isPresent()) - { - String dataSplitField = ingestMode().dataSplitField().get(); - LogicalPlanUtils.removeField(fieldsToSelect, dataSplitField); - LogicalPlanUtils.removeField(fieldsToInsert, dataSplitField); - DatasetReference stagingRight = stagingDataset.datasetReference().withAlias("stage_right"); - FieldValue dataSplitLeft = FieldValue.builder() - .fieldName(dataSplitField) - .datasetRef(stagingDataset.datasetReference()) - .build(); - FieldValue dataSplitRight = dataSplitLeft.withDatasetRef(stagingRight.datasetReference()); - selectCondition = Optional.of(Not.of(Exists.of(Selection.builder() - .source(stagingRight) - .condition(And.builder() - .addConditions( - LessThan.of(dataSplitLeft, dataSplitRight), - getPrimaryKeyMatchCondition(stagingDataset, stagingRight, primaryKeys.toArray(new String[0]))) - .build()) - .addAllFields(ALL_COLUMNS()) - .build()))); - } // If audit is enabled, add audit column to select and insert fields if (ingestMode().auditing().accept(AUDIT_ENABLED)) { @@ -99,16 +65,8 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) String auditField = ingestMode().auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(auditField).build()); } - else if (!ingestMode().dataSplitField().isPresent()) - { - fieldsToSelect = LogicalPlanUtils.ALL_COLUMNS(); - } - Selection selectStaging = Selection.builder() - .source(stagingDataset) - .addAllFields(fieldsToSelect) - .condition(selectCondition) - .build(); + Selection selectStaging = Selection.builder().source(stagingDataset()).addAllFields(fieldsToSelect).build(); List operations = new ArrayList<>(); // Step 1: Delete all rows from existing table @@ -119,22 +77,6 @@ else if (!ingestMode().dataSplitField().isPresent()) return LogicalPlan.of(operations); } - @Override - public LogicalPlan buildLogicalPlanForPreActions(Resources resources) - { - List operations = new ArrayList<>(); - operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) - { - operations.add(Create.of(true, stagingDataset())); - } - if (options().enableConcurrentSafety()) - { - operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); - } - return LogicalPlan.of(operations); - } - @Override public Map buildLogicalPlanForPreRunStatistics(Resources resources) { @@ -152,6 +94,17 @@ protected void addPostRunStatsForRowsDeleted(Map pos { } + @Override + List getDigestOrRemainingColumns() + { + List remainingCols = new ArrayList<>(); + if (!primaryKeys.isEmpty()) + { + remainingCols = getNonPKNonVersionDataFields(); + } + return remainingCols; + } + @Override public boolean dataSplitExecutionSupported() { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 6123b86eec8..98b1ecf7a11 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -14,23 +14,30 @@ package org.finos.legend.engine.persistence.components.planner; +import java.util.function.Consumer; import java.util.stream.Collectors; + import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.Resources; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitor; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditingAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditingAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationVisitors; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; -import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestampAbstract; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.*; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.ObjectValue; import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; @@ -52,6 +59,8 @@ import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_TERMINATED; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_UPDATED; +import static org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicationHandler.COUNT; +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.SUPPORTED_DATA_TYPES_FOR_VERSIONING_COLUMNS; import static org.immutables.value.Value.Default; import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; @@ -106,14 +115,48 @@ default boolean enableConcurrentSafety() private final PlannerOptions plannerOptions; protected final Set capabilities; protected final List primaryKeys; + private final Optional tempStagingDataset; + private final Optional tempStagingDatasetWithoutPks; + private final Dataset effectiveStagingDataset; + protected final boolean isTempTableNeededForStaging; Planner(Datasets datasets, IngestMode ingestMode, PlannerOptions plannerOptions, Set capabilities) { this.datasets = datasets; this.ingestMode = ingestMode; this.plannerOptions = plannerOptions == null ? PlannerOptions.builder().build() : plannerOptions; + this.isTempTableNeededForStaging = LogicalPlanUtils.isTempTableNeededForStaging(ingestMode); + this.tempStagingDataset = getTempStagingDataset(); + this.tempStagingDatasetWithoutPks = getTempStagingDatasetWithoutPks(); + this.effectiveStagingDataset = isTempTableNeededForStaging ? tempStagingDataset() : originalStagingDataset(); this.capabilities = capabilities; this.primaryKeys = findCommonPrimaryKeysBetweenMainAndStaging(); + + // Validation + // 1. MaxVersion & AllVersion strategies must have primary keys + ingestMode.versioningStrategy().accept(new ValidatePrimaryKeysForVersioningStrategy(primaryKeys, this::validatePrimaryKeysNotEmpty)); + // 2. Validate if the versioningField is comparable if a versioningStrategy is present + validateVersioningField(ingestMode().versioningStrategy(), stagingDataset()); + } + + private Optional getTempStagingDataset() + { + Optional tempStagingDataset = Optional.empty(); + if (isTempTableNeededForStaging) + { + tempStagingDataset = Optional.of(LogicalPlanUtils.getTempStagingDatasetDefinition(originalStagingDataset(), ingestMode)); + } + return tempStagingDataset; + } + + private Optional getTempStagingDatasetWithoutPks() + { + Optional tempStagingDatasetWithoutPks = Optional.empty(); + if (isTempTableNeededForStaging) + { + tempStagingDatasetWithoutPks = Optional.of(LogicalPlanUtils.getTempStagingDatasetWithoutPks(tempStagingDataset())); + } + return tempStagingDatasetWithoutPks; } private List findCommonPrimaryKeysBetweenMainAndStaging() @@ -127,11 +170,42 @@ protected Dataset mainDataset() return datasets.mainDataset(); } - protected Dataset stagingDataset() + public Dataset stagingDataset() + { + return effectiveStagingDataset; + } + + protected Dataset originalStagingDataset() { return datasets.stagingDataset(); } + protected Dataset tempStagingDataset() + { + return tempStagingDataset.orElseThrow(IllegalStateException::new); + } + + protected Dataset tempStagingDatasetWithoutPks() + { + return tempStagingDatasetWithoutPks.orElseThrow(IllegalStateException::new); + } + + protected List getDataFields() + { + List dataFields = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); + Optional dedupField = ingestMode.deduplicationStrategy().accept(DeduplicationVisitors.EXTRACT_DEDUP_FIELD); + + if (ingestMode().dataSplitField().isPresent()) + { + LogicalPlanUtils.removeField(dataFields, ingestMode().dataSplitField().get()); + } + if (dedupField.isPresent()) + { + LogicalPlanUtils.removeField(dataFields, dedupField.get()); + } + return dataFields; + } + protected Optional metadataDataset() { return datasets.metadataDataset(); @@ -184,26 +258,62 @@ public LogicalPlan buildLogicalPlanForAcquireLock(Resources resources) return null; } - public abstract LogicalPlan buildLogicalPlanForPreActions(Resources resources); + public LogicalPlan buildLogicalPlanForPreActions(Resources resources) + { + List operations = new ArrayList<>(); + operations.add(Create.of(true, mainDataset())); + if (options().createStagingDataset()) + { + operations.add(Create.of(true, originalStagingDataset())); + } + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } + if (isTempTableNeededForStaging) + { + operations.add(Create.of(true, tempStagingDatasetWithoutPks())); + } + return LogicalPlan.of(operations); + } - public LogicalPlan buildLogicalPlanForPostActions(Resources resources) + public LogicalPlan buildLogicalPlanForDeduplicationAndVersioning(Resources resources) { List operations = new ArrayList<>(); - // Drop table or clean table based on flags - if (resources.externalDatasetImported()) + if (isTempTableNeededForStaging) { - operations.add(Drop.of(true, stagingDataset(), true)); + operations.add(Delete.builder().dataset(tempStagingDataset()).build()); + Dataset dedupAndVersionedDataset = LogicalPlanUtils.getDedupedAndVersionedDataset(ingestMode.deduplicationStrategy(), ingestMode.versioningStrategy(), originalStagingDataset(), primaryKeys); + List fieldsToInsert = new ArrayList<>(dedupAndVersionedDataset.schemaReference().fieldValues()); + operations.add(Insert.of(tempStagingDataset(), dedupAndVersionedDataset, fieldsToInsert)); } - else if (plannerOptions.cleanupStagingData()) + return LogicalPlan.of(operations); + } + + public LogicalPlan buildLogicalPlanForPostActions(Resources resources) + { + List operations = new ArrayList<>(); + if (plannerOptions.cleanupStagingData()) { - operations.add(Delete.builder().dataset(stagingDataset()).build()); + operations.add(Delete.builder().dataset(originalStagingDataset()).build()); } return LogicalPlan.of(operations); } + // Introduce a flag public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) { - return null; + List operations = new ArrayList<>(); + // Drop table + if (resources.externalDatasetImported()) + { + operations.add(Drop.of(true, originalStagingDataset(), true)); + } + if (isTempTableNeededForStaging) + { + operations.add(Drop.of(true, tempStagingDataset(), true)); + } + return LogicalPlan.of(operations); } public Map buildLogicalPlanForPreRunStatistics(Resources resources) @@ -231,6 +341,47 @@ public Map buildLogicalPlanForPostRunStatistics(Reso return postRunStatisticsResult; } + public Map buildLogicalPlanForDeduplicationAndVersioningErrorChecks(Resources resources) + { + Map dedupAndVersioningErrorChecks = new HashMap<>(); + addMaxDuplicatesErrorCheck(dedupAndVersioningErrorChecks); + addDataErrorCheck(dedupAndVersioningErrorChecks); + return dedupAndVersioningErrorChecks; + } + + protected void addMaxDuplicatesErrorCheck(Map dedupAndVersioningErrorChecks) + { + if (ingestMode.deduplicationStrategy() instanceof FailOnDuplicates) + { + FunctionImpl maxCount = FunctionImpl.builder() + .functionName(FunctionName.MAX) + .addValue(FieldValue.builder().datasetRef(tempStagingDataset().datasetReference()).fieldName(COUNT).build()) + .alias(DedupAndVersionErrorStatistics.MAX_DUPLICATES.name()) + .build(); + Selection selectMaxDupsCount = Selection.builder() + .source(tempStagingDataset()) + .addFields(maxCount) + .build(); + LogicalPlan maxDuplicatesCountPlan = LogicalPlan.builder().addOps(selectMaxDupsCount).build(); + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorStatistics.MAX_DUPLICATES, maxDuplicatesCountPlan); + } + } + + protected void addDataErrorCheck(Map dedupAndVersioningErrorChecks) + { + List remainingColumns = getDigestOrRemainingColumns(); + if (ingestMode.versioningStrategy().accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED)) + { + LogicalPlan logicalPlan = ingestMode.versioningStrategy().accept(new DeriveDataErrorCheckLogicalPlan(primaryKeys, remainingColumns, tempStagingDataset())); + if (logicalPlan != null) + { + dedupAndVersioningErrorChecks.put(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS, logicalPlan); + } + } + } + + abstract List getDigestOrRemainingColumns(); + protected void validatePrimaryKeysNotEmpty(List primaryKeys) { if (primaryKeys.isEmpty()) @@ -266,17 +417,30 @@ protected void addPreRunStatsForRowsDeleted(Map preR protected void addPostRunStatsForIncomingRecords(Map postRunStatisticsResult) { Optional filterCondition = Optional.empty(); - if (dataSplitExecutionSupported()) + Value countIncomingRecords = FunctionImpl.builder().functionName(FunctionName.COUNT).alias(INCOMING_RECORD_COUNT.get()).addValue(All.INSTANCE).build(); + Dataset dataset = originalStagingDataset(); + + // If data splits are present + if (ingestMode.dataSplitField().isPresent()) { - Optional dataSplitInRangeCondition = getDataSplitInRangeConditionForStatistics(); - if (dataSplitInRangeCondition.isPresent()) + dataset = stagingDataset(); + filterCondition = getDataSplitInRangeConditionForStatistics(); + Optional duplicateCountFieldName = ingestMode.deduplicationStrategy().accept(DeduplicationVisitors.EXTRACT_DEDUP_FIELD); + // If deduplication has been performed + if (duplicateCountFieldName.isPresent()) { - filterCondition = Optional.of(dataSplitInRangeCondition.get()); + FieldValue duplicateCountField = FieldValue.builder().fieldName(duplicateCountFieldName.get()).datasetRef(dataset.datasetReference()).build(); + FunctionImpl sumOfDuplicateFieldCount = FunctionImpl.builder().functionName(FunctionName.SUM).addValue(duplicateCountField).build(); + countIncomingRecords = FunctionImpl.builder().functionName(FunctionName.COALESCE).alias(INCOMING_RECORD_COUNT.get()).addValue(sumOfDuplicateFieldCount, ObjectValue.of(0)).build(); } } LogicalPlan incomingRecordCountPlan = LogicalPlan.builder() - .addOps(LogicalPlanUtils.getRecordCount(stagingDataset(), INCOMING_RECORD_COUNT.get(), filterCondition)) + .addOps(Selection.builder() + .source(dataset) + .addFields(countIncomingRecords) + .condition(filterCondition) + .build()) .build(); postRunStatisticsResult.put(INCOMING_RECORD_COUNT, incomingRecordCountPlan); } @@ -305,6 +469,35 @@ protected void addPostRunStatsForRowsDeleted(Map pos postRunStatisticsResult.put(ROWS_DELETED, rowsDeletedCountPlan); } + protected List getNonPKNonVersionDataFields() + { + List nonPkDataFields = stagingDataset().schemaReference().fieldValues().stream() + .map(fieldValue -> fieldValue.fieldName()) + .filter(fieldName -> !primaryKeys.contains(fieldName)) + .collect(Collectors.toList()); + Optional dedupField = ingestMode.deduplicationStrategy().accept(DeduplicationVisitors.EXTRACT_DEDUP_FIELD); + Optional versioningField = ingestMode.versioningStrategy().accept(VersioningVisitors.EXTRACT_VERSIONING_FIELD); + nonPkDataFields.removeIf(field -> ingestMode().dataSplitField().isPresent() && field.equals(ingestMode().dataSplitField().get())); + nonPkDataFields.removeIf(field -> dedupField.isPresent() && field.equals(dedupField.get())); + nonPkDataFields.removeIf(field -> versioningField.isPresent() && field.equals(versioningField.get())); + return nonPkDataFields; + } + + protected void validateVersioningField(VersioningStrategy versioningStrategy, Dataset dataset) + { + Optional versioningField = versioningStrategy.accept(VersioningVisitors.EXTRACT_VERSIONING_FIELD); + if (versioningField.isPresent()) + { + Field filterField = dataset.schema().fields().stream() + .filter(field -> field.name().equals(versioningField.get())) + .findFirst().orElseThrow(() -> new IllegalStateException(String.format("Versioning field [%s] not found in Staging Schema", versioningField.get()))); + if (!SUPPORTED_DATA_TYPES_FOR_VERSIONING_COLUMNS.contains(filterField.type().dataType())) + { + throw new IllegalStateException(String.format("Versioning field's data type [%s] is not supported", filterField.type().dataType())); + } + } + } + // auditing visitor protected static final AuditEnabled AUDIT_ENABLED = new AuditEnabled(); @@ -327,4 +520,44 @@ public Boolean visitDateTimeAuditing(DateTimeAuditingAbstract dateTimeAuditing) return true; } } + + static class ValidatePrimaryKeysForVersioningStrategy implements VersioningStrategyVisitor + { + final List primaryKeys; + final Consumer> validatePrimaryKeysNotEmpty; + + ValidatePrimaryKeysForVersioningStrategy(List primaryKeys, Consumer> validatePrimaryKeysNotEmpty) + { + this.primaryKeys = primaryKeys; + this.validatePrimaryKeysNotEmpty = validatePrimaryKeysNotEmpty; + } + + @Override + public Void visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) + { + return null; + } + + @Override + public Void visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) + { + validatePrimaryKeysNotEmpty.accept(primaryKeys); + if (primaryKeys.contains(maxVersionStrategy.versioningField())) + { + throw new IllegalStateException("Versioning field cannot be a primary key"); + } + return null; + } + + @Override + public Void visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) + { + validatePrimaryKeysNotEmpty.accept(primaryKeys); + if (primaryKeys.contains(allVersionsStrategyAbstract.versioningField())) + { + throw new IllegalStateException("Versioning field cannot be a primary key"); + } + return null; + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java index 58cf0f92144..2431aec2ac8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java @@ -18,8 +18,7 @@ import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicator; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningConditionVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningConditionVisitor; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitors; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; @@ -30,7 +29,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.conditions.Or; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.operations.Update; @@ -56,10 +54,8 @@ class UnitemporalDeltaPlanner extends UnitemporalPlanner { private final Optional deleteIndicatorField; private final List deleteIndicatorValues; - private final Dataset enrichedStagingDataset; private final Condition versioningCondition; private final Condition inverseVersioningCondition; - private final Optional deleteIndicatorIsNotSetCondition; private final Optional deleteIndicatorIsSetCondition; private final Optional dataSplitInRangeCondition; @@ -73,8 +69,6 @@ class UnitemporalDeltaPlanner extends UnitemporalPlanner { validateOptimizationFilters(ingestMode.optimizationFilters(), stagingDataset()); } - // Validate if the versioningField is comparable if a versioningStrategy is present - validateVersioningField(ingestMode().versioningStrategy(), stagingDataset()); this.deleteIndicatorField = ingestMode.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD); this.deleteIndicatorValues = ingestMode.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_VALUES); @@ -82,9 +76,6 @@ class UnitemporalDeltaPlanner extends UnitemporalPlanner this.deleteIndicatorIsNotSetCondition = deleteIndicatorField.map(field -> LogicalPlanUtils.getDeleteIndicatorIsNotSetCondition(stagingDataset(), field, deleteIndicatorValues)); this.deleteIndicatorIsSetCondition = deleteIndicatorField.map(field -> LogicalPlanUtils.getDeleteIndicatorIsSetCondition(stagingDataset(), field, deleteIndicatorValues)); this.dataSplitInRangeCondition = ingestMode.dataSplitField().map(field -> LogicalPlanUtils.getDataSplitInRangeCondition(stagingDataset(), field)); - // Perform Deduplication & Filtering of Staging Dataset - this.enrichedStagingDataset = ingestMode().versioningStrategy() - .accept(new DatasetDeduplicator(stagingDataset(), primaryKeys)); this.versioningCondition = ingestMode().versioningStrategy() .accept(new VersioningConditionVisitor(mainDataset(), stagingDataset(), false, ingestMode().digestField())); this.inverseVersioningCondition = ingestMode.versioningStrategy() @@ -113,24 +104,6 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) return LogicalPlan.of(operations); } - @Override - public LogicalPlan buildLogicalPlanForPreActions(Resources resources) - { - List operations = new ArrayList<>(); - operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) - { - operations.add(Create.of(true, stagingDataset())); - } - operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); - if (options().enableConcurrentSafety()) - { - operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); - } - return LogicalPlan.of(operations); - } - - /* ------------------ Upsert Logic: @@ -145,10 +118,10 @@ INSERT INTO main_table (staging_columns, special_columns) */ private Insert getUpsertLogic() { - List columnsToInsert = new ArrayList<>(); - List stagingColumns = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); + List dataFields = getDataFields(); + List columnsToInsert = new ArrayList<>(dataFields); + List stagingColumns = new ArrayList<>(dataFields); List milestoneColumns = transactionMilestoningFields(); - columnsToInsert.addAll(stagingColumns); columnsToInsert.addAll(milestoneColumns); List columnsToSelect = new ArrayList<>(stagingColumns); @@ -158,12 +131,6 @@ private Insert getUpsertLogic() LogicalPlanUtils.removeField(columnsToInsert, deleteIndicatorField); }); - if (ingestMode().dataSplitField().isPresent()) - { - LogicalPlanUtils.removeField(columnsToSelect, ingestMode().dataSplitField().get()); - LogicalPlanUtils.removeField(columnsToInsert, ingestMode().dataSplitField().get()); - } - List milestoneUpdateValues = transactionMilestoningFieldValues(); columnsToSelect.addAll(milestoneUpdateValues); @@ -206,7 +173,7 @@ private Insert getUpsertLogic() } } - Dataset selectStage = Selection.builder().source(enrichedStagingDataset).condition(selectCondition).addAllFields(columnsToSelect).build(); + Dataset selectStage = Selection.builder().source(stagingDataset()).condition(selectCondition).addAllFields(columnsToSelect).build(); return Insert.of(mainDataset(), selectStage, columnsToInsert); } @@ -241,7 +208,7 @@ private Update getMilestoningLogic() Condition existsCondition = Exists.of( Selection.builder() - .source(enrichedStagingDataset) + .source(stagingDataset()) .condition(selectCondition) .addAllFields(LogicalPlanUtils.ALL_COLUMNS()) .build()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java index 3b021655afc..d811ed6cfd6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java @@ -18,10 +18,6 @@ import org.finos.legend.engine.persistence.components.common.OptimizationFilter; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategyAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.NoVersioningStrategyAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAbstract; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTimeAbstract; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTimeAbstract; @@ -35,6 +31,8 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.values.All; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; @@ -57,14 +55,12 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_UPDATED; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_TERMINATED; import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.SUPPORTED_DATA_TYPES_FOR_OPTIMIZATION_COLUMNS; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.SUPPORTED_DATA_TYPES_FOR_VERSIONING_COLUMNS; abstract class UnitemporalPlanner extends Planner { @@ -75,7 +71,7 @@ abstract class UnitemporalPlanner extends Planner protected final Condition openRecordCondition; protected final Condition digestMatchCondition; protected final Condition digestDoesNotMatchCondition; - + protected final String digestField; protected Condition primaryKeysMatchCondition; UnitemporalPlanner(Datasets datasets, TransactionMilestoned transactionMilestoned, PlannerOptions plannerOptions, Set capabilities) @@ -96,6 +92,7 @@ abstract class UnitemporalPlanner extends Planner this.mainTableName = StringValue.of(mainDataset().datasetReference().name().orElseThrow(IllegalStateException::new)); this.batchStartTimestamp = BatchStartTimestamp.INSTANCE; this.batchEndTimestamp = BatchEndTimestamp.INSTANCE; + this.digestField = transactionMilestoned.digestField(); this.openRecordCondition = transactionMilestoned.transactionMilestoning().accept(new DetermineOpenRecordCondition(mainDataset())); this.digestMatchCondition = LogicalPlanUtils.getDigestMatchCondition(mainDataset(), stagingDataset(), transactionMilestoned.digestField()); this.primaryKeysMatchCondition = LogicalPlanUtils.getPrimaryKeyMatchCondition(mainDataset(), stagingDataset(), primaryKeys.toArray(new String[0])); @@ -111,10 +108,37 @@ protected TransactionMilestoned ingestMode() @Override public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { - List stagingFilters = LogicalPlanUtils.getDatasetFilters(stagingDataset()); + List stagingFilters = LogicalPlanUtils.getDatasetFilters(originalStagingDataset()); return LogicalPlan.of(Arrays.asList(metadataUtils.insertMetaData(mainTableName, batchStartTimestamp, batchEndTimestamp, stagingFilters))); } + @Override + List getDigestOrRemainingColumns() + { + return Arrays.asList(digestField); + } + + @Override + public LogicalPlan buildLogicalPlanForPreActions(Resources resources) + { + List operations = new ArrayList<>(); + operations.add(Create.of(true, mainDataset())); + if (options().createStagingDataset()) + { + operations.add(Create.of(true, originalStagingDataset())); + } + operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } + if (isTempTableNeededForStaging) + { + operations.add(Create.of(true, tempStagingDatasetWithoutPks())); + } + return LogicalPlan.of(operations); + } + protected void validatePrimaryKey(List fields, String targetFieldName) { Field targetField = fields.stream().filter(field -> field.name().equals(targetFieldName)).findFirst().orElse(null); @@ -154,35 +178,6 @@ protected void validateOptimizationFilters(List optimization } } - protected void validateVersioningField(VersioningStrategy versioningStrategy, Dataset dataset) - { - Optional versioningField = versioningStrategy.accept(new VersioningStrategyVisitor>() - { - @Override - public Optional visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) - { - return Optional.empty(); - } - - @Override - public Optional visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) - { - return Optional.of(maxVersionStrategy.versioningField()); - } - }); - - if (versioningField.isPresent()) - { - Field filterField = dataset.schema().fields().stream() - .filter(field -> field.name().equals(versioningField.get())) - .findFirst().orElseThrow(() -> new IllegalStateException(String.format("Versioning field [%s] not found in Staging Schema", versioningField.get()))); - if (!SUPPORTED_DATA_TYPES_FOR_VERSIONING_COLUMNS.contains(filterField.type().dataType())) - { - throw new IllegalStateException(String.format("Versioning field's data type [%s] is not supported", filterField.type().dataType())); - } - } - } - protected List> keyValuesForMilestoningUpdate() { return ingestMode().transactionMilestoning().accept(new DetermineMilestoningUpdateKeyValues(mainDataset(), metadataUtils, batchStartTimestamp)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index da241423103..e0cbd780cd0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -92,23 +92,6 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) } } - @Override - public LogicalPlan buildLogicalPlanForPreActions(Resources resources) - { - List operations = new ArrayList<>(); - operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) - { - operations.add(Create.of(true, stagingDataset())); - } - operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); - if (options().enableConcurrentSafety()) - { - operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); - } - return LogicalPlan.of(operations); - } - /* insert into main_table ( @@ -156,12 +139,13 @@ protected Insert sqlToUpsertRows() .addFields(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().digestField()).build()) .build())); - List fieldsToSelect = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); + List dataFields = getDataFields(); + List fieldsToSelect = new ArrayList<>(dataFields); List milestoneUpdateValues = transactionMilestoningFieldValues(); fieldsToSelect.addAll(milestoneUpdateValues); Dataset selectStage = Selection.builder().source(stagingDataset()).condition(notInSinkCondition).addAllFields(fieldsToSelect).build(); - List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); + List fieldsToInsert = new ArrayList<>(dataFields); fieldsToInsert.addAll(transactionMilestoningFields()); return Insert.of(mainDataset(), selectStage, fieldsToInsert); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 7ce63a97f29..bcc67933972 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -19,6 +19,9 @@ import org.finos.legend.engine.persistence.components.common.DatasetFilter; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.OptimizationFilter; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.*; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; @@ -36,6 +39,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.logicalplan.values.All; import org.finos.legend.engine.persistence.components.logicalplan.values.Array; import org.finos.legend.engine.persistence.components.logicalplan.values.DatetimeValue; @@ -80,6 +84,7 @@ public class LogicalPlanUtils public static final String DATA_SPLIT_UPPER_BOUND_PLACEHOLDER = "{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}"; public static final String UNDERSCORE = "_"; public static final String TEMP_DATASET_BASE_NAME = "legend_persistence_temp"; + public static final String TEMP_STAGING_DATASET_BASE_NAME = "legend_persistence_temp_staging"; public static final String TEMP_DATASET_WITH_DELETE_INDICATOR_BASE_NAME = "legend_persistence_tempWithDeleteIndicator"; private LogicalPlanUtils() @@ -438,6 +443,47 @@ public static Dataset getTempDatasetWithDeleteIndicator(Datasets datasets, Strin } } + public static Dataset getTempStagingDatasetDefinition(Dataset stagingDataset, IngestMode ingestMode) + { + String alias = stagingDataset.datasetReference().alias().orElse(TEMP_STAGING_DATASET_BASE_NAME); + String datasetName = stagingDataset.datasetReference().name().orElseThrow(IllegalStateException::new) + UNDERSCORE + TEMP_STAGING_DATASET_BASE_NAME; + SchemaDefinition tempStagingSchema = ingestMode.versioningStrategy().accept(new DeriveTempStagingSchemaDefinition(stagingDataset.schema(), ingestMode.deduplicationStrategy())); + return DatasetDefinition.builder() + .schema(tempStagingSchema) + .database(stagingDataset.datasetReference().database()) + .group(stagingDataset.datasetReference().group()) + .name(datasetName) + .alias(alias) + .build(); + } + + public static Dataset getTempStagingDatasetWithoutPks(Dataset tempStagingDataset) + { + List fieldsWithoutPk = tempStagingDataset.schema().fields().stream() + .map(field -> field.withPrimaryKey(false)).collect(Collectors.toList()); + return tempStagingDataset.withSchema(tempStagingDataset.schema().withFields(fieldsWithoutPk)); + } + + public static Dataset getDedupedAndVersionedDataset(DeduplicationStrategy deduplicationStrategy, VersioningStrategy versioningStrategy, Dataset stagingDataset, List primaryKeys) + { + Dataset dedupedDataset = deduplicationStrategy.accept(new DatasetDeduplicationHandler(stagingDataset)); + boolean isTempTableNeededForVersioning = versioningStrategy.accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED); + if (isTempTableNeededForVersioning && dedupedDataset instanceof Selection) + { + Selection selection = (Selection) dedupedDataset; + dedupedDataset = selection.withAlias(stagingDataset.datasetReference().alias()); + } + Dataset versionedDataset = versioningStrategy.accept(new DatasetVersioningHandler(dedupedDataset, primaryKeys)); + return versionedDataset; + } + + public static boolean isTempTableNeededForStaging(IngestMode ingestMode) + { + boolean isTempTableNeededForVersioning = ingestMode.versioningStrategy().accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED); + boolean isTempTableNeededForDedup = ingestMode.deduplicationStrategy().accept(DeduplicationVisitors.IS_TEMP_TABLE_NEEDED); + return isTempTableNeededForVersioning || isTempTableNeededForDedup; + } + public static Set SUPPORTED_DATA_TYPES_FOR_OPTIMIZATION_COLUMNS = new HashSet<>(Arrays.asList(INT, INTEGER, BIGINT, FLOAT, DOUBLE, DECIMAL, DATE)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java index 15765b38b52..a04839f00bf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java @@ -25,6 +25,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshotAbstract; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoadAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationVisitors; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitors; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAbstract; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTimeAbstract; @@ -35,6 +36,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromAndThruDateTimeAbstract; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTimeAbstract; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.ValidityDerivationVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningVisitors; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; @@ -307,47 +309,54 @@ private SchemaDefinition evolveSchemaDefinition(SchemaDefinition schema, Set visitAppendOnly(AppendOnlyAbstract appendOnly) { - return Collections.emptySet(); + Set stagingFieldsToIgnore = getDedupAndVersioningFields(appendOnly); + return stagingFieldsToIgnore; } @Override public Set visitNontemporalSnapshot(NontemporalSnapshotAbstract nontemporalSnapshot) { - return Collections.emptySet(); + Set stagingFieldsToIgnore = getDedupAndVersioningFields(nontemporalSnapshot); + return stagingFieldsToIgnore; } @Override public Set visitNontemporalDelta(NontemporalDeltaAbstract nontemporalDelta) { - return Collections.emptySet(); + Set stagingFieldsToIgnore = getDedupAndVersioningFields(nontemporalDelta); + return stagingFieldsToIgnore; } @Override public Set visitUnitemporalSnapshot(UnitemporalSnapshotAbstract unitemporalSnapshot) { - return Collections.emptySet(); + Set stagingFieldsToIgnore = getDedupAndVersioningFields(unitemporalSnapshot); + return stagingFieldsToIgnore; } @Override public Set visitUnitemporalDelta(UnitemporalDeltaAbstract unitemporalDelta) { - return unitemporalDelta.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD) - .map(Collections::singleton) - .orElse(Collections.emptySet()); + Set stagingFieldsToIgnore = getDedupAndVersioningFields(unitemporalDelta); + unitemporalDelta.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD).ifPresent(stagingFieldsToIgnore::add); + return stagingFieldsToIgnore; } @Override public Set visitBitemporalSnapshot(BitemporalSnapshotAbstract bitemporalSnapshot) { - return bitemporalSnapshot.validityMilestoning().accept(VALIDITY_FIELDS_TO_IGNORE_IN_STAGING); + Set stagingFieldsToIgnore = getDedupAndVersioningFields(bitemporalSnapshot); + stagingFieldsToIgnore.addAll(bitemporalSnapshot.validityMilestoning().accept(VALIDITY_FIELDS_TO_IGNORE_IN_STAGING)); + return stagingFieldsToIgnore; } @Override public Set visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) { - Set fieldsToIgnore = bitemporalDelta.validityMilestoning().accept(VALIDITY_FIELDS_TO_IGNORE_IN_STAGING); - bitemporalDelta.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD).ifPresent(fieldsToIgnore::add); - return fieldsToIgnore; + Set stagingFieldsToIgnore = getDedupAndVersioningFields(bitemporalDelta); + stagingFieldsToIgnore.addAll(bitemporalDelta.validityMilestoning().accept(VALIDITY_FIELDS_TO_IGNORE_IN_STAGING)); + bitemporalDelta.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD).ifPresent(stagingFieldsToIgnore::add); + return stagingFieldsToIgnore; } @Override @@ -355,6 +364,14 @@ public Set visitBulkLoad(BulkLoadAbstract bulkLoad) { return Collections.emptySet(); } + + private Set getDedupAndVersioningFields(IngestMode ingestMode) + { + Set dedupAndVersioningFields = new HashSet<>(); + ingestMode.dataSplitField().ifPresent(dedupAndVersioningFields::add); + ingestMode.deduplicationStrategy().accept(DeduplicationVisitors.EXTRACT_DEDUP_FIELD).ifPresent(dedupAndVersioningFields::add); + return dedupAndVersioningFields; + } }; private static final IngestModeVisitor> MAIN_TABLE_FIELDS_TO_IGNORE = new IngestModeVisitor>() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java index 3e663deb408..a76a9babe61 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java @@ -41,7 +41,7 @@ public interface Transformer> partitionFilter = new HashMap>() {{ - put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00", "2000-01-02 00:00:00"))); + put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00.000000", "2000-01-02 00:00:00"))); }}; // Base Columns: Primary keys : id, name @@ -402,10 +402,10 @@ public class IngestModeTest "\"TABLE_BATCH_ID\" INTEGER)"; protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\")" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (\"TABLE_NAME\", \"TABLE_BATCH_ID\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\")" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.\"TABLE_NAME\" = 'main'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.\"TABLE_NAME\" = 'main'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; protected String expectedMetadataTableIngestQueryWithPlaceHolders = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\") (SELECT 'main',{BATCH_ID_PATTERN},'{BATCH_START_TS_PATTERN}','{BATCH_END_TS_PATTERN}','DONE')"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java index ef02b5b78d1..cfed0b48c69 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java @@ -82,24 +82,24 @@ public void verifyBitemporalDeltaBatchIdDateTimeBasedNoDeleteIndWithDataSplits(L { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE " + "((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"validity_from_target\" = stage.\"validity_from_reference\") AND (sink.\"digest\" <> stage.\"digest\")))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"validity_from_target\", " + - "\"validity_through_target\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + + "\"validity_through_target\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"validity_through_reference\"," + - "stage.\"digest\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "stage.\"digest\",stage.\"version\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE " + "(sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"validity_from_target\" = stage.\"validity_from_reference\")))) AND " + "((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalMainTableWithBatchIdDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedMilestoneQuery, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -164,7 +164,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND " + @@ -173,10 +173,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"validity_from_target\", \"validity_through_target\", \"digest\", " + + "(\"id\", \"name\", \"amount\", \"validity_from_target\", \"validity_through_target\", \"digest\", \"version\", " + "\"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\"," + - "stage.\"validity_through_reference\",stage.\"digest\",'2000-01-01 00:00:00'," + + "stage.\"validity_through_reference\",stage.\"digest\",stage.\"version\",'2000-01-01 00:00:00.000000'," + "'9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage WHERE " + "((NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + "AND (sink.\"digest\" = stage.\"digest\") AND ((sink.\"id\" = stage.\"id\") AND " + @@ -184,7 +184,7 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) AND " + "(stage.\"delete_indicator\" NOT IN ('yes','1','true')))"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalMainTableWithDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedMilestoneQuery, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -196,10 +196,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List= 1) AND (stage.\"data_split\" <= 1)"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))"; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsInserted\""; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsTerminated\""; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsInserted\""; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsTerminated\""; verifyStats(operations.get(0), incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java index 777c8bb3a4f..aa9ac70d4c0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java @@ -123,10 +123,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(GeneratorRe public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedStageToTemp = "INSERT INTO \"mydb\".\"temp\" " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + "FROM " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"version\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"legend_persistence_start_date\",COALESCE(MIN(legend_persistence_y.\"legend_persistence_start_date\"),MIN(legend_persistence_x.\"legend_persistence_end_date\")) as \"legend_persistence_end_date\" " + "FROM " + @@ -144,10 +144,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"version\",stage.\"delete_indicator\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"delete_indicator\" NOT IN ('yes','1','true')) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"legend_persistence_start_date\",COALESCE(MIN(legend_persistence_y.\"legend_persistence_start_date\"),MIN(legend_persistence_x.\"legend_persistence_end_date\")) as \"legend_persistence_end_date\" " + "FROM " + @@ -372,10 +374,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List legend_persistence_x.\"validity_from_target\") AND (legend_persistence_y.\"delete_indicator\" = 0) " + "WHERE legend_persistence_x.\"delete_indicator\" = 0 " + - "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"digest\", legend_persistence_x.\"validity_from_target\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\") as legend_persistence_x " + + "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"version\", legend_persistence_x.\"digest\", legend_persistence_x.\"validity_from_target\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\") as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.\"id\" = legend_persistence_y.\"id\") AND (legend_persistence_x.\"name\" = legend_persistence_y.\"name\")) AND (legend_persistence_y.\"validity_through_target\" > legend_persistence_x.\"legend_persistence_start_date\") AND (legend_persistence_y.\"validity_through_target\" <= legend_persistence_x.\"legend_persistence_end_date\") AND (legend_persistence_y.\"delete_indicator\" <> 0) " + - "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"digest\", legend_persistence_x.\"legend_persistence_start_date\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\")"; + "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"version\", legend_persistence_x.\"digest\", legend_persistence_x.\"legend_persistence_start_date\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\")"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery, operations.get(0).preActionsSql().get(3)); @@ -564,15 +566,15 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplitsFilterDuplic public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDuplicates(List operations, List dataSplitRanges) { String expectedStageToStageWithoutDuplicates = "INSERT INTO \"mydb\".\"stagingWithoutDuplicates\" " + - "(\"id\", \"name\", \"amount\", \"validity_from_reference\", \"digest\", \"data_split\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage " + + "(\"id\", \"name\", \"amount\", \"validity_from_reference\", \"digest\", \"version\", \"data_split\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"version\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"digest\" = stage.\"digest\") AND (sink.\"batch_id_out\" = 999999999))))"; String expectedStageToTemp = "INSERT INTO \"mydb\".\"temp\" " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + "FROM " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"data_split\" FROM \"mydb\".\"stagingWithoutDuplicates\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"version\",stage.\"data_split\" FROM \"mydb\".\"stagingWithoutDuplicates\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"legend_persistence_start_date\",COALESCE(MIN(legend_persistence_y.\"legend_persistence_start_date\"),MIN(legend_persistence_x.\"legend_persistence_end_date\")) as \"legend_persistence_end_date\" " + "FROM " + @@ -590,10 +592,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "ON ((legend_persistence_x.\"id\" = legend_persistence_y.\"id\") AND (legend_persistence_x.\"name\" = legend_persistence_y.\"name\")) AND (legend_persistence_x.\"validity_from_reference\" = legend_persistence_y.\"legend_persistence_start_date\"))"; String expectedMainToTemp = "INSERT INTO \"mydb\".\"temp\" " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + "FROM " + - "(SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"batch_id_in\",sink.\"batch_id_out\",sink.\"validity_from_target\",sink.\"validity_through_target\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999) as legend_persistence_x " + + "(SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"version\",sink.\"batch_id_in\",sink.\"batch_id_out\",sink.\"validity_from_target\",sink.\"validity_through_target\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"legend_persistence_start_date\",legend_persistence_x.\"legend_persistence_end_date\" as \"legend_persistence_end_date\" " + "FROM " + @@ -618,13 +620,13 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "AND (sink.\"batch_id_out\" = 999999999)"; String expectedTempToMain = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"validity_from_target\", \"validity_through_target\") " + - "(SELECT temp.\"id\",temp.\"name\",temp.\"amount\",temp.\"digest\",temp.\"batch_id_in\",temp.\"batch_id_out\",temp.\"validity_from_target\",temp.\"validity_through_target\" FROM \"mydb\".\"temp\" as temp)"; + "(\"id\", \"name\", \"amount\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\", \"validity_from_target\", \"validity_through_target\") " + + "(SELECT temp.\"id\",temp.\"name\",temp.\"amount\",temp.\"digest\",temp.\"version\",temp.\"batch_id_in\",temp.\"batch_id_out\",temp.\"validity_from_target\",temp.\"validity_through_target\" FROM \"mydb\".\"temp\" as temp)"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), operations.get(0).preActionsSql().get(1)); - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyStageWithDataSplitWithoutDuplicatesTableCreateQuery, operations.get(0).preActionsSql().get(3)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyTempTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(2)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery, operations.get(0).preActionsSql().get(3)); Assertions.assertEquals(expectedStageToStageWithoutDuplicates, operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedStageToTemp, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(1)); @@ -795,6 +797,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + "\"digest\" VARCHAR," + + "\"version\" INTEGER," + "\"batch_id_in\" INTEGER NOT NULL," + "\"batch_id_out\" INTEGER," + "\"validity_from_target\" DATETIME NOT NULL," + @@ -806,6 +809,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "\"name\" VARCHAR NOT NULL," + "\"amount\" DOUBLE," + "\"digest\" VARCHAR," + + "\"version\" INTEGER," + "\"batch_id_in\" INTEGER NOT NULL," + "\"batch_id_out\" INTEGER," + "\"validity_from_target\" DATETIME NOT NULL," + @@ -819,20 +823,21 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "\"amount\" DOUBLE," + "\"validity_from_reference\" DATETIME NOT NULL," + "\"digest\" VARCHAR," + + "\"version\" INTEGER," + "\"delete_indicator\" VARCHAR," + "\"data_split\" BIGINT NOT NULL," + "PRIMARY KEY (\"id\", \"name\", \"validity_from_reference\", \"data_split\"))"; String expectedStageToStageWithoutDuplicates = "INSERT INTO " + stageWithoutDuplicatesName + " " + - "(\"id\", \"name\", \"amount\", \"validity_from_reference\", \"digest\", \"delete_indicator\", \"data_split\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"delete_indicator\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage " + + "(\"id\", \"name\", \"amount\", \"validity_from_reference\", \"digest\", \"version\", \"delete_indicator\", \"data_split\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\",stage.\"version\",stage.\"delete_indicator\",stage.\"data_split\" FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"digest\" = stage.\"digest\") AND (sink.\"batch_id_out\" = 999999999))))"; String expectedStageToTemp = "INSERT INTO " + tempName + " " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + "FROM " + - "(SELECT legend_persistence_stageWithoutDuplicates.\"id\",legend_persistence_stageWithoutDuplicates.\"name\",legend_persistence_stageWithoutDuplicates.\"amount\",legend_persistence_stageWithoutDuplicates.\"validity_from_reference\",legend_persistence_stageWithoutDuplicates.\"digest\",legend_persistence_stageWithoutDuplicates.\"delete_indicator\",legend_persistence_stageWithoutDuplicates.\"data_split\" FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.\"delete_indicator\" NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + + "(SELECT legend_persistence_stageWithoutDuplicates.\"id\",legend_persistence_stageWithoutDuplicates.\"name\",legend_persistence_stageWithoutDuplicates.\"amount\",legend_persistence_stageWithoutDuplicates.\"validity_from_reference\",legend_persistence_stageWithoutDuplicates.\"digest\",legend_persistence_stageWithoutDuplicates.\"version\",legend_persistence_stageWithoutDuplicates.\"delete_indicator\",legend_persistence_stageWithoutDuplicates.\"data_split\" FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.\"delete_indicator\" NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"legend_persistence_start_date\",COALESCE(MIN(legend_persistence_y.\"legend_persistence_start_date\"),MIN(legend_persistence_x.\"legend_persistence_end_date\")) as \"legend_persistence_end_date\" " + "FROM " + @@ -850,10 +855,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "ON ((legend_persistence_x.\"id\" = legend_persistence_y.\"id\") AND (legend_persistence_x.\"name\" = legend_persistence_y.\"name\")) AND (legend_persistence_x.\"validity_from_reference\" = legend_persistence_y.\"legend_persistence_start_date\"))"; String expectedMainToTemp = "INSERT INTO " + tempName + " " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + "FROM " + - "(SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"batch_id_in\",sink.\"batch_id_out\",sink.\"validity_from_target\",sink.\"validity_through_target\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999) as legend_persistence_x " + + "(SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"version\",sink.\"batch_id_in\",sink.\"batch_id_out\",sink.\"validity_from_target\",sink.\"validity_through_target\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"legend_persistence_start_date\",legend_persistence_x.\"legend_persistence_end_date\" as \"legend_persistence_end_date\" " + "FROM " + @@ -878,12 +883,12 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "AND (sink.\"batch_id_out\" = 999999999)"; String expectedTempToMain = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"validity_from_target\", \"validity_through_target\") " + - "(SELECT legend_persistence_temp.\"id\",legend_persistence_temp.\"name\",legend_persistence_temp.\"amount\",legend_persistence_temp.\"digest\",legend_persistence_temp.\"batch_id_in\",legend_persistence_temp.\"batch_id_out\",legend_persistence_temp.\"validity_from_target\",legend_persistence_temp.\"validity_through_target\" FROM " + tempName + " as legend_persistence_temp)"; + "(\"id\", \"name\", \"amount\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\", \"validity_from_target\", \"validity_through_target\") " + + "(SELECT legend_persistence_temp.\"id\",legend_persistence_temp.\"name\",legend_persistence_temp.\"amount\",legend_persistence_temp.\"digest\",legend_persistence_temp.\"version\",legend_persistence_temp.\"batch_id_in\",legend_persistence_temp.\"batch_id_out\",legend_persistence_temp.\"validity_from_target\",legend_persistence_temp.\"validity_through_target\" FROM " + tempName + " as legend_persistence_temp)"; String expectedMainToTempForDeletion = "INSERT INTO " + tempWithDeleteIndicatorName + " " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\", \"delete_indicator\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_x.\"validity_through_target\" as \"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,(CASE WHEN legend_persistence_y.\"delete_indicator\" IS NULL THEN 0 ELSE 1 END) " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\", \"delete_indicator\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_x.\"validity_through_target\" as \"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,(CASE WHEN legend_persistence_y.\"delete_indicator\" IS NULL THEN 0 ELSE 1 END) " + "FROM " + "(SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (EXISTS " + @@ -903,19 +908,19 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "AND (sink.\"batch_id_out\" = 999999999)"; String expectedTempToMainForDeletion = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"legend_persistence_start_date\" as \"legend_persistence_start_date\",MAX(legend_persistence_y.\"validity_through_target\") as \"legend_persistence_end_date\",legend_persistence_x.\"batch_id_in\",legend_persistence_x.\"batch_id_out\" FROM " + - "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",COALESCE(MIN(legend_persistence_y.\"validity_from_target\"),'9999-12-31 23:59:59') as \"legend_persistence_end_date\",legend_persistence_x.\"batch_id_in\",legend_persistence_x.\"batch_id_out\" " + + "(\"id\", \"name\", \"amount\", \"version\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"legend_persistence_start_date\" as \"legend_persistence_start_date\",MAX(legend_persistence_y.\"validity_through_target\") as \"legend_persistence_end_date\",legend_persistence_x.\"batch_id_in\",legend_persistence_x.\"batch_id_out\" FROM " + + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"version\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",COALESCE(MIN(legend_persistence_y.\"validity_from_target\"),'9999-12-31 23:59:59') as \"legend_persistence_end_date\",legend_persistence_x.\"batch_id_in\",legend_persistence_x.\"batch_id_out\" " + "FROM " + tempWithDeleteIndicatorName + " as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.\"id\" = legend_persistence_y.\"id\") AND (legend_persistence_x.\"name\" = legend_persistence_y.\"name\")) AND (legend_persistence_y.\"validity_from_target\" > legend_persistence_x.\"validity_from_target\") AND (legend_persistence_y.\"delete_indicator\" = 0) " + "WHERE legend_persistence_x.\"delete_indicator\" = 0 " + - "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"digest\", legend_persistence_x.\"validity_from_target\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\") as legend_persistence_x " + + "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"version\", legend_persistence_x.\"digest\", legend_persistence_x.\"validity_from_target\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\") as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.\"id\" = legend_persistence_y.\"id\") AND (legend_persistence_x.\"name\" = legend_persistence_y.\"name\")) AND (legend_persistence_y.\"validity_through_target\" > legend_persistence_x.\"legend_persistence_start_date\") AND (legend_persistence_y.\"validity_through_target\" <= legend_persistence_x.\"legend_persistence_end_date\") AND (legend_persistence_y.\"delete_indicator\" <> 0) " + - "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"digest\", legend_persistence_x.\"legend_persistence_start_date\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\")"; + "GROUP BY legend_persistence_x.\"id\", legend_persistence_x.\"name\", legend_persistence_x.\"amount\", legend_persistence_x.\"version\", legend_persistence_x.\"digest\", legend_persistence_x.\"legend_persistence_start_date\", legend_persistence_x.\"batch_id_in\", legend_persistence_x.\"batch_id_out\")"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery, operations.get(0).preActionsSql().get(3)); @@ -1041,7 +1046,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedStageToTemp = "INSERT INTO \"mydb\".\"temp\" " + "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\"," + - "legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "legend_persistence_y.\"legend_persistence_end_date\",(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1063,7 +1068,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedMainToTemp = "INSERT INTO \"mydb\".\"temp\" " + "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM " + "(SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"batch_id_in\",sink.\"batch_id_out\",sink.\"batch_time_in\"," + "sink.\"batch_time_out\",sink.\"validity_from_target\",sink.\"validity_through_target\" FROM \"mydb\".\"main\" as sink " + @@ -1087,7 +1092,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedUpdateMain = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (EXISTS " + "(SELECT * FROM \"mydb\".\"temp\" as temp WHERE ((sink.\"id\" = temp.\"id\") AND (sink.\"name\" = temp.\"name\")) " + "AND (sink.\"validity_from_target\" = temp.\"validity_from_target\"))) AND (sink.\"batch_id_out\" = 999999999)"; @@ -1122,7 +1127,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR String expectedStageToTemp = "INSERT INTO \"mydb\".\"temp\" " + "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\",legend_persistence_x.\"validity_from_reference\" as \"legend_persistence_start_date\"," + - "legend_persistence_y.\"legend_persistence_end_date\",'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "legend_persistence_y.\"legend_persistence_end_date\",'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"validity_from_reference\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1145,7 +1150,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT legend_persistence_x.\"id\",legend_persistence_x.\"name\",legend_persistence_x.\"amount\",legend_persistence_x.\"digest\"," + "legend_persistence_x.\"validity_from_target\" as \"legend_persistence_start_date\",legend_persistence_y.\"legend_persistence_end_date\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM (SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"batch_time_in\"," + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM (SELECT sink.\"id\",sink.\"name\",sink.\"amount\",sink.\"digest\",sink.\"batch_time_in\"," + "sink.\"batch_time_out\",sink.\"validity_from_target\",sink.\"validity_through_target\" " + "FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '9999-12-31 23:59:59') as legend_persistence_x " + "INNER JOIN " + @@ -1168,7 +1173,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "AND (legend_persistence_x.\"validity_from_target\" = legend_persistence_y.\"legend_persistence_start_date\"))"; String expectedUpdateMain = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (EXISTS (SELECT * FROM \"mydb\".\"temp\" as temp WHERE " + "((sink.\"id\" = temp.\"id\") AND (sink.\"name\" = temp.\"name\")) AND " + "(sink.\"validity_from_target\" = temp.\"validity_from_target\"))) AND (sink.\"batch_time_out\" = '9999-12-31 23:59:59')"; @@ -1190,8 +1195,8 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00'"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00') as \"rowsInserted\""; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000'"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') as \"rowsInserted\""; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 69799b67769..305698ea828 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -35,10 +35,9 @@ public class AppendOnlyTest extends AppendOnlyTestCases String rowsUpdated = "SELECT 0 as \"rowsUpdated\""; String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"staging\" as stage"; @Override - public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations) + public void verifyAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -56,45 +55,36 @@ public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyAllowDuplicatesWithAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00' " + - "FROM \"mydb\".\"staging\" as stage)"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQueryWithAuditAndNoPKs, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertNull(operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); } @Override - public void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List generatorResults, List dataSplitRanges) + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(List generatorResults, List dataSplitRanges) { String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00' " + - "FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit, generatorResults.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, generatorResults.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates, generatorResults.get(0).deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), generatorResults.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), generatorResults.get(1).ingestSql().get(0)); Assertions.assertEquals(2, generatorResults.size()); // Stats - String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage " + - "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_update_time\" = (SELECT MAX(sink.\"batch_update_time\") FROM \"mydb\".\"main\" as sink)"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -105,70 +95,23 @@ public void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage)"; - - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyFailOnDuplicatesWithAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00' FROM \"mydb\".\"staging\" as stage)"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditNotPkCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyFilterDuplicatesNoAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage " + - "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + - "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + - "(sink.\"digest\" = stage.\"digest\"))))"; - - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); - Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); - Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); - Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); - } - - @Override - public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries) + public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRecords(GeneratorResult queries) { List preActionsSqlList = queries.preActionsSql(); List milestoningSqlList = queries.ingestSql(); + List deduplicationAndVersioningSql = queries.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00' FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); List postActionsSql = queries.postActionsSql(); @@ -177,6 +120,7 @@ public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries assertIfListsAreSameIgnoringOrder(expectedSQL, postActionsSql); // Stats + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_update_time\" = (SELECT MAX(sink.\"batch_update_time\") FROM \"mydb\".\"main\" as sink)"; Assertions.assertEquals(incomingRecordCount, queries.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, queries.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); @@ -186,25 +130,29 @@ public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries } @Override - public void verifyAppendOnlyFilterDuplicatesWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExistingRecords(List operations, List dataSplitRanges) { String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00' " + - "FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" = stage.\"digest\")))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit, operations.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates, operations.get(0).deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage " + + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_update_time\" = (SELECT MAX(sink.\"batch_update_time\") FROM \"mydb\".\"main\" as sink)"; @@ -222,14 +170,13 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\") " + - "(SELECT * FROM \"MYDB\".\"STAGING\" as stage " + - "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink " + - "WHERE ((sink.\"ID\" = stage.\"ID\") " + - "AND (sink.\"NAME\" = stage.\"NAME\")) " + - "AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; + String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" " + + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_UPDATE_TIME\") " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00.000000' FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE NOT (EXISTS " + + "(SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQueryWithUpperCase, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase, preActionsSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); } @@ -239,24 +186,72 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String insertSql = "INSERT INTO \"mydb\".\"main\" " + - "(\"id\", \"name\", \"amount\", \"digest\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage " + - "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + - "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + - "(sink.\"digest\" = stage.\"digest\"))))"; + String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"digest\", \"batch_update_time\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\",'2000-01-01 00:00:00.000000' FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); } - private void verifyStats(GeneratorResult operations) + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecords(GeneratorResult operations) { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + + String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE ((sink.\"id\" = stage.\"id\") AND " + + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; + String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_update_time\" = (SELECT MAX(sink.\"batch_update_time\") FROM \"mydb\".\"main\" as sink)"; Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + } + + @Override + public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + + String insertSql = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; + String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_update_time\" = (SELECT MAX(sink.\"batch_update_time\") FROM \"mydb\".\"main\" as sink)"; + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index 948a3132866..7bc27355818 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; @@ -25,26 +26,36 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; -import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; -import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; public class NontemporalDeltaTest extends NontemporalDeltaTestCases { protected String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE " + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + protected String incomingRecordCountWithSplitsTempStagingTable = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + protected String incomingRecordCountWithSplitsWithDuplicates = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "(stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + protected String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; protected String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - protected String rowsDeletedWithDeleteIndicator = "SELECT COUNT(*) as \"rowsDeleted\" FROM \"mydb\".\"main\" as sink WHERE EXISTS (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\") AND (stage.\"delete_indicator\" IN ('yes','1','true')))"; + protected String rowsDeletedWithDeleteIndicator = "SELECT COUNT(*) as \"rowsDeleted\" FROM \"mydb\".\"main\" as sink WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\") AND (stage.\"delete_indicator\" IN ('yes','1','true')))"; @Override - public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + @@ -57,7 +68,7 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; @@ -65,6 +76,9 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio Assertions.assertEquals(AnsiTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSqlList.get(1)); Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSqlList.get(2)); + Assertions.assertTrue(deduplicationAndVersioningSql.isEmpty()); + Assertions.assertTrue(deduplicationAndVersioningErrorChecksSql.isEmpty()); + Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); @@ -78,31 +92,37 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio } @Override - public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + - "sink.\"batch_update_time\" = '2000-01-01 00:00:00' " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; + "SET sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000' " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00' " + - "FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertTrue(deduplicationAndVersioningErrorChecksSql.isEmpty()); // Stats Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); @@ -110,7 +130,44 @@ public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operat } @Override - public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) + { + String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + + "(((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) " + + "AND ((stage.\"data_split\" >= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; + + String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + + "AND (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(expectedBaseTempStagingTablePlusDigestWithDataSplit, operations.get(0).preActionsSql().get(1)); + Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups, operations.get(0).deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, operations.get(0).deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + + Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(1)); + + // Stats + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempStagingTable, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempStagingTable, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + + @Override + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List operations, List dataSplitRanges) { String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + @@ -131,9 +188,12 @@ public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(1)); + Assertions.assertTrue(operations.get(0).deduplicationAndVersioningSql().isEmpty()); + Assertions.assertTrue(operations.get(0).deduplicationAndVersioningErrorChecksSqlPlan().isEmpty()); + Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(1)); - + // Stats Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -142,19 +202,19 @@ public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List } @Override - public void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + - "sink.\"batch_update_time\" = '2000-01-01 00:00:00' " + - "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))," + + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000' " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00' FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000' FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; @@ -164,15 +224,21 @@ public void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -193,8 +259,8 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator(Gener "WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; String deleteSql = "DELETE FROM \"mydb\".\"main\" as sink " + - "WHERE EXISTS (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" " + - "FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + + "WHERE EXISTS " + + "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND (sink.\"digest\" = stage.\"digest\") AND (stage.\"delete_indicator\" IN ('yes','1','true')))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); @@ -227,7 +293,7 @@ public void verifyNontemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati "AND (sink.\"DIGEST\" <> stage.\"DIGEST\"))"; String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\") " + - "(SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\" FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink " + "WHERE (sink.\"ID\" = stage.\"ID\") " + "AND (sink.\"NAME\" = stage.\"NAME\"))))"; @@ -251,7 +317,7 @@ public void verifyNontemporalDeltaWithLessColumnsInStaging(GeneratorResult opera "((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"digest\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; @@ -284,7 +350,7 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-03')))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE (NOT (EXISTS " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")))) AND ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-03')))"; @@ -300,31 +366,46 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(GeneratorResult operations) + public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"id\" = (SELECT stage.\"id\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1))," + - "sink.\"name\" = (SELECT stage.\"name\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1))," + - "sink.\"amount\" = (SELECT stage.\"amount\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1))," + - "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1))," + - "sink.\"digest\" = (SELECT stage.\"digest\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1))," + - "sink.\"version\" = (SELECT stage.\"version\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1)) " + - "WHERE EXISTS (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"legend_persistence_row_num\" = 1))"; + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))," + + "sink.\"version\" = (SELECT stage.\"version\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\") " + - "(SELECT * FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" FROM " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage " + - "WHERE stage.\"snapshot_id\" > 18972) as stage " + - "WHERE stage.\"legend_persistence_row_num\" = 1) as stage " + - "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" FROM " + + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\"," + + "DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_rank\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",COUNT(*) as \"legend_persistence_count\" " + + "FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972 GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\", stage.\"version\") as stage) " + + "as stage WHERE stage.\"legend_persistence_rank\" = 1)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSqlList.get(1)); Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + + Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972"; // Stats Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -333,7 +414,7 @@ public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(Ge } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -348,7 +429,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")) AND (stage.\"snapshot_id\" > 18972))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE (NOT (EXISTS " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" FROM \"mydb\".\"staging\" as stage WHERE (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")))) AND (stage.\"snapshot_id\" > 18972))"; @@ -356,6 +437,9 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertTrue(operations.deduplicationAndVersioningSql().isEmpty()); + Assertions.assertTrue(operations.deduplicationAndVersioningErrorChecksSql().isEmpty()); + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972"; // Stats Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -364,7 +448,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaNoDedupMaxVersionWithoutPerform(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -379,13 +463,16 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\"))"; String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertTrue(operations.deduplicationAndVersioningSql().isEmpty()); + Assertions.assertTrue(operations.deduplicationAndVersioningErrorChecksSql().isEmpty()); + // Stats Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); @@ -393,29 +480,40 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String updateSql = "UPDATE \"MYDB\".\"MAIN\" as sink SET " + - "sink.\"ID\" = (SELECT stage.\"ID\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1))," + - "sink.\"NAME\" = (SELECT stage.\"NAME\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1))," + - "sink.\"AMOUNT\" = (SELECT stage.\"AMOUNT\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1))," + - "sink.\"BIZ_DATE\" = (SELECT stage.\"BIZ_DATE\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1))," + - "sink.\"DIGEST\" = (SELECT stage.\"DIGEST\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1))," + - "sink.\"VERSION\" = (SELECT stage.\"VERSION\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1)) " + - "WHERE EXISTS (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE (((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) AND (stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1))"; + String updateSql = "UPDATE \"MYDB\".\"MAIN\" as sink " + + "SET sink.\"ID\" = (SELECT stage.\"ID\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"NAME\" = (SELECT stage.\"NAME\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"AMOUNT\" = (SELECT stage.\"AMOUNT\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"BIZ_DATE\" = (SELECT stage.\"BIZ_DATE\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"DIGEST\" = (SELECT stage.\"DIGEST\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))," + + "sink.\"VERSION\" = (SELECT stage.\"VERSION\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")) " + + "WHERE EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\"))"; String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + - "(SELECT * FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM " + - "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage " + - "WHERE stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1) as stage " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\"))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithVersionUpperCase, preActionsSqlList.get(1)); Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + + String insertTempStagingTable = "INSERT INTO \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + + "DENSE_RANK() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_RANK\" " + + "FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE stage.\"LEGEND_PERSISTENCE_RANK\" = 1)"; + + Assertions.assertEquals(expectedTempStagingCleanupQueryInUpperCase, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(insertTempStagingTable, operations.deduplicationAndVersioningSql().get(1)); + + Assertions.assertEquals(dataErrorCheckSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index 3771c827314..d1b1e403e56 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -25,30 +26,31 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; -import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; -import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { String cleanUpMainTableSql = "DELETE FROM \"mydb\".\"main\" as sink"; String cleanupMainTableSqlUpperCase = "DELETE FROM \"MYDB\".\"MAIN\" as sink"; String rowsDeleted = "SELECT COUNT(*) as \"rowsDeleted\" FROM \"mydb\".\"main\" as sink"; - String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; String rowsUpdated = "SELECT 0 as \"rowsUpdated\""; String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink"; String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; @Override - public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map andVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage)"; + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\" FROM \"mydb\".\"staging\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseStagingTableCreateQuery, preActionsSqlList.get(1)); @@ -58,68 +60,67 @@ public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult opera Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + Assertions.assertTrue(deduplicationAndVersioningSql.isEmpty()); + Assertions.assertTrue(andVersioningErrorChecksSql.isEmpty()); // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override - public void verifyNontemporalSnapshotNoAuditingWithDataSplit(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\" FROM \"mydb\".\"staging\" as stage " + - "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage_right WHERE " + - "(stage.\"data_split\" < stage_right.\"data_split\") AND ((stage.\"id\" = stage_right.\"id\") AND (stage.\"name\" = stage_right.\"name\")))))"; - - Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(cleanUpMainTableSql, milestoningSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyNontemporalSnapshotWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00' " + - "FROM \"mydb\".\"staging\" as stage)"; + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00.000000' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(1)); Assertions.assertEquals(cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertTrue(deduplicationAndVersioningErrorChecksSql.isEmpty()); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override - public void verifyNontemporalSnapshotWithAuditingWithDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String insertSql = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00.000000' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage)"; - String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"batch_update_time\") " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",'2000-01-01 00:00:00' " + - "FROM \"mydb\".\"staging\" as stage WHERE NOT (EXISTS " + - "(SELECT * FROM \"mydb\".\"staging\" as stage_right " + - "WHERE (stage.\"data_split\" < stage_right.\"data_split\") AND ((stage.\"id\" = stage_right.\"id\") AND " + - "(stage.\"name\" = stage_right.\"name\")))))"; + String maxDataErrorCheckSql = "SELECT MAX(\"legend_persistence_distinct_rows\") as \"MAX_DATA_ERRORS\" FROM " + + "(SELECT COUNT(DISTINCT(\"amount\")) as \"legend_persistence_distinct_rows\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "as stage GROUP BY \"id\", \"name\", \"biz_date\") as stage"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(1)); Assertions.assertEquals(cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override @@ -129,7 +130,7 @@ public void verifyNontemporalSnapshotWithUpperCaseOptimizer(GeneratorResult quer List milestoningSqlList = queries.ingestSql(); String insertSql = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\") " + - "(SELECT * FROM \"MYDB\".\"STAGING\" as stage)"; + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\" FROM \"MYDB\".\"STAGING\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQueryWithUpperCase, preActionsSqlList.get(0)); Assertions.assertEquals(cleanupMainTableSqlUpperCase, milestoningSqlList.get(0)); @@ -143,7 +144,7 @@ public void verifyNontemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\") " + - "(SELECT * FROM \"mydb\".\"staging\" as stage)"; + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\" FROM \"mydb\".\"staging\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -160,9 +161,9 @@ public void verifyNontemporalSnapshotWithCleanStagingData(GeneratorResult operat } @Override - public void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostActions) + public void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostCleanup) { - List sqlsForPostActions = physicalPlanForPostActions.getSqlList(); + List sqlsForPostActions = physicalPlanForPostCleanup.getSqlList(); List expectedSQL = new ArrayList<>(); expectedSQL.add(AnsiTestArtifacts.expectedDropTableQuery); assertIfListsAreSameIgnoringOrder(expectedSQL, sqlsForPostActions); @@ -174,12 +175,13 @@ public RelationalSink getRelationalSink() return AnsiSqlSink.get(); } - private void verifyStats(GeneratorResult operations) + private void verifyStats(GeneratorResult operations, String stageTableName) { // Pre stats: Assertions.assertEquals(rowsDeleted, operations.preIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); // Post Stats: + String incomingRecordCount = String.format("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"%s\" as stage", stageTableName); Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java index 8470cdbeb0f..f95df9e8b3d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; @@ -23,13 +24,15 @@ import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -75,7 +78,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupAllVersionsWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -106,6 +109,9 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; @@ -116,17 +122,19 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE " + "(sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -134,7 +142,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999 FROM \"mydb\".\"staging\" as stage " + + "999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))) AND " + @@ -146,6 +154,16 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"delete_indicator\", \"legend_persistence_count\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"delete_indicator\"," + + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\", stage.\"delete_indicator\")"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertTrue(deduplicationAndVersioningErrorChecksSql.isEmpty()); + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; @@ -155,13 +173,13 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE " + "(sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -169,7 +187,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + @@ -187,7 +205,17 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"delete_indicator\", \"data_split\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"delete_indicator\"," + + "DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + + "FROM \"mydb\".\"staging\" as stage)"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndAllowDups, operations.get(0).deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, operations.get(0).deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsInserted\""; @@ -270,7 +298,7 @@ public void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(GeneratorResult operations) + public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -279,29 +307,36 @@ public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(Gen "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS " + - "(SELECT * FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" " + - "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() " + - "OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" " + - "FROM \"mydb\".\"staging\" as stage WHERE stage.\"batch_id_in\" > 5) as stage " + - "WHERE stage.\"legend_persistence_row_num\" = 1) as stage " + + "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")))"; - String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", " + - "\"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + + String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + - "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + - "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" " + - "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + - "ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) " + - "as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"batch_id_in\" > 5) as stage " + - "WHERE stage.\"legend_persistence_row_num\" = 1) as stage " + - "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + - "WHERE (sink.\"batch_id_out\" = 999999999) AND (stage.\"version\" <= sink.\"version\") " + - "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "as stage WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER " + + "(PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_rank\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + + "stage.\"version\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + + "WHERE stage.\"batch_id_in\" > 5 GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", " + + "stage.\"digest\", stage.\"version\") as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1)"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -309,7 +344,7 @@ public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(Gen } @Override - public void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(GeneratorResult operations) + public void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -347,7 +382,7 @@ public void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(Generato } @Override - public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -355,7 +390,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(stage.\"version\" > sink.\"version\")))"; @@ -364,13 +399,24 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + "999999999 " + - "FROM \"mydb\".\"staging\" as stage " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage GROUP BY stage.\"id\", " + + "stage.\"name\", stage.\"amount\", stage.\"biz_date\", stage.\"digest\", stage.\"version\")"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithFilterDuplicates, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -378,37 +424,39 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = " + - "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1 " + - "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + - "(EXISTS (SELECT * FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" " + - "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + - "ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" " + - "FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1) as stage " + - "WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")))"; + String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink " + + "SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1 WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + + "(EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage WHERE " + + "((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (stage.\"VERSION\" >= sink.\"VERSION\")))"; - String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", " + - "\"VERSION\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\") " + + String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999 FROM " + - "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" " + - "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + - "ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) " + - "as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage " + - "WHERE stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1) as stage WHERE NOT " + - "(EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink " + - "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (stage.\"VERSION\" < sink.\"VERSION\") " + - "AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999 FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND " + + "(stage.\"VERSION\" < sink.\"VERSION\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithVersionUpperCase, preActionsSql.get(2)); + + String expectedInsertIntoTempStagingMaxVersion = "INSERT INTO \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" " + + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"VERSION\") " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" " + + "FROM (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\"," + + "DENSE_RANK() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_RANK\" " + + "FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE stage.\"LEGEND_PERSISTENCE_RANK\" = 1)"; + + Assertions.assertEquals(expectedTempStagingCleanupQueryInUpperCase, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoTempStagingMaxVersion, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(dataErrorCheckSqlUpperCase, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -416,7 +464,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWitho } @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndWithOptimizationFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -457,7 +505,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFilters(G } @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFiltersIncludesNullValues(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndWithOptimizationFiltersIncludesNullValues(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -529,7 +577,7 @@ protected String getExpectedMetadataTableIngestQueryWithStagingFilters(String st "(\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"staging_filters\") " + "(SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE'," + + "'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE'," + String.format("PARSE_JSON('%s'))", stagingFilters); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 505b9893733..55890efccd2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -25,10 +25,14 @@ import java.util.ArrayList; import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; + public class UnitemporalDeltaBatchIdDateTimeBasedTest extends UnitmemporalDeltaBatchIdDateTimeBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -36,7 +40,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -46,7 +50,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + @@ -69,13 +73,13 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" <> stage.\"digest\")))"; @@ -83,8 +87,8 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + @@ -92,6 +96,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) as \"rowsInserted\""; @@ -112,7 +126,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -120,7 +134,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + @@ -132,7 +146,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator "\"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))) AND " + @@ -155,7 +169,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator } @Override - public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalDeltaWithDeleteInd(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -163,7 +177,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET sink.\"batch_id_out\" = " + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + @@ -175,7 +189,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "\"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))) AND " + @@ -190,13 +204,13 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE " + "((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -204,7 +218,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(sink.\"digest\" = stage.\"digest\") AND ((sink.\"id\" = stage.\"id\") AND " + @@ -213,6 +227,20 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1,sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00' WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (EXISTS (SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" <> stage.\"DIGEST\")))"; - String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (sink.\"DIGEST\" = stage.\"DIGEST\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; + String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1,sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00.000000' WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (EXISTS (SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" <> stage.\"DIGEST\")))"; + String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (sink.\"DIGEST\" = stage.\"DIGEST\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); @@ -255,7 +283,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE " + "((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")))"; @@ -264,7 +292,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera "(\"id\", \"name\", \"amount\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"digest\" = stage.\"digest\") " + @@ -330,7 +358,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) String expectedMilestoneQuery = "UPDATE \"my_schema\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM \"my_schema\".\"staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -340,7 +368,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"my_schema\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"my_schema\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + @@ -375,7 +403,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati String expectedMilestoneQuery = "UPDATE \"mydb\".\"my_schema\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM \"mydb\".\"my_schema\".\"staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -385,7 +413,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"my_schema\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"my_schema\".\"main\" as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + @@ -420,7 +448,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper String expectedMilestoneQuery = "UPDATE main as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1," + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + "(EXISTS (SELECT * FROM staging as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -430,7 +458,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM staging as stage " + "WHERE NOT (EXISTS (SELECT * FROM main as sink " + "WHERE (sink.\"batch_id_out\" = 999999999) " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDateTimeBasedTest.java index 148bc47d4d1..e2891b503f3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDateTimeBasedTest.java @@ -25,17 +25,22 @@ import java.util.ArrayList; import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.maxDupsErrorCheckSql; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DUPLICATES; + public class UnitemporalDeltaDateTimeBasedTest extends UnitmemporalDeltaDateTimeBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + @@ -44,7 +49,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + @@ -59,29 +64,29 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio // Stats String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00'"; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00') as \"rowsInserted\""; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') as \"rowsInserted\""; String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + "(sink.\"digest\" <> stage.\"digest\")))"; String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + - "FROM \"mydb\".\"staging\" as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + @@ -89,6 +94,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00'"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00') as \"rowsInserted\""; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') as \"rowsInserted\""; String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + @@ -126,7 +142,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", " + "\"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND (sink.\"digest\" = stage.\"digest\") " + "AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\"))))) AND " + @@ -141,21 +157,21 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper // Stats String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))"; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsInserted\""; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsTerminated\""; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsInserted\""; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsTerminated\""; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + - "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE ((stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) " + "AND ((sink.\"digest\" <> stage.\"digest\") OR (stage.\"delete_indicator\" IN ('yes','1','true')))))"; @@ -163,7 +179,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND (sink.\"digest\" = stage.\"digest\") " + @@ -173,6 +189,19 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsInserted\""; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsTerminated\""; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsInserted\""; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsTerminated\""; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -197,9 +226,9 @@ public void verifyUnitemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00' WHERE (sink.\"BATCH_TIME_OUT\" = '9999-12-31 23:59:59') AND (EXISTS (SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" <> stage.\"DIGEST\")))"; + String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00.000000' WHERE (sink.\"BATCH_TIME_OUT\" = '9999-12-31 23:59:59') AND (EXISTS (SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" <> stage.\"DIGEST\")))"; - String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_TIME_OUT\" = '9999-12-31 23:59:59') AND (sink.\"DIGEST\" = stage.\"DIGEST\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; + String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN\" as sink WHERE (sink.\"BATCH_TIME_OUT\" = '9999-12-31 23:59:59') AND (sink.\"DIGEST\" = stage.\"DIGEST\") AND ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")))))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableTimeBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java index b30f182ed29..b79aeb6c903 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; @@ -22,6 +23,7 @@ import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; @@ -35,13 +37,15 @@ public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBat String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1)-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsTerminated\""; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); List initializeLockSql = operations.initializeLockSql(); List acquireLockSql = operations.acquireLockSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -61,6 +65,52 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(2)); Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSql.get(3)); + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + Assertions.assertTrue(deduplicationAndVersioningSql.isEmpty()); + Assertions.assertTrue(deduplicationAndVersioningErrorChecksSql.isEmpty()); + + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + + @Override + public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + + "WHERE (sink.\"batch_id_out\" = 999999999) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + + String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(2)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSql.get(3)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSql.get(4)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); @@ -101,7 +151,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -131,7 +181,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index b9fed006365..65111b98915 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -15,14 +15,15 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; -import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotBatchIdBasedTestCases; import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotBatchIdDateTimeBasedTestCases; import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSnapshotBatchIdDateTimeBasedTestCases { @@ -33,14 +34,14 @@ public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSna String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1)-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsTerminated\""; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -48,7 +49,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; @@ -62,6 +63,41 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + + "WHERE (sink.\"batch_id_out\" = 999999999) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + + String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigest, preActionsSql.get(2)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations) { @@ -69,7 +105,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa List milestoningSql = operations.ingestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE sink.\"batch_id_out\" = 999999999"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -78,16 +114,32 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa } @Override - public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - - String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1,sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00' WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; - String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" (\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") (SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage WHERE NOT (stage.\"DIGEST\" IN (SELECT sink.\"DIGEST\" FROM \"MYDB\".\"MAIN\" as sink WHERE sink.\"BATCH_ID_OUT\" = 999999999)))"; + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET sink.\"BATCH_ID_OUT\" = " + + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + + "UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN')-1,sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00.000000' " + + "WHERE (sink.\"BATCH_ID_OUT\" = 999999999) AND (NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE ((sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; + String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_ID_IN\", \"BATCH_ID_OUT\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") " + + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\"," + + "(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + + "WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + + "WHERE NOT (stage.\"DIGEST\" IN (SELECT sink.\"DIGEST\" FROM \"MYDB\".\"MAIN\" as sink WHERE sink.\"BATCH_ID_OUT\" = 999999999)))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountUpperCase, preActionsSql.get(2)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateAsVersionUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -95,14 +147,14 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\")))) " + @@ -111,7 +163,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"biz_date\" = stage.\"biz_date\"))))"; @@ -138,14 +190,14 @@ public void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(G } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\")))) " + @@ -154,7 +206,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) AND (sink.\"biz_date\" IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; @@ -174,7 +226,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmp List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (sink.\"biz_date\" IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; @@ -200,7 +252,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_id_out\" = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; @@ -208,7 +260,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"digest\", \"batch_id_in\", \"batch_id_out\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"digest\"," + - "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = 999999999)))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java index acec611ab3a..4db9bb8ae4d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java @@ -15,33 +15,36 @@ package org.finos.legend.engine.persistence.components.ingestmode.unitemporal; import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; -import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotBatchIdBasedTestCases; import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotDateTimeBasedTestCases; import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; - String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))"; + String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsInserted\""; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00')))) as \"rowsTerminated\""; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_in\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsInserted\""; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\")) AND (sink2.\"batch_time_in\" = '2000-01-01 00:00:00.000000')))) as \"rowsTerminated\""; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"staging\" as stage " + @@ -50,12 +53,53 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '9999-12-31 23:59:59')))"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + Assertions.assertTrue(deduplicationAndVersioningSql.isEmpty()); + Assertions.assertTrue(deduplicationAndVersioningErrorChecksSql.isEmpty()); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + + @Override + public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + + "AND (NOT (EXISTS " + + "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\"))))"; + + String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_time_out\" = '9999-12-31 23:59:59')))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSql.get(2)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(AnsiTestArtifacts.expectedInsertIntoBaseTempStagingWithFilterDupsAndMaxVersion, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(AnsiTestArtifacts.dataErrorCheckSqlWithBizDateVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -71,7 +115,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandli List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE sink.\"batch_time_out\" = '9999-12-31 23:59:59'"; Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -89,7 +133,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink SET " + - "sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00' " + + "sink.\"BATCH_TIME_OUT\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"BATCH_TIME_OUT\" = '9999-12-31 23:59:59') AND " + "(NOT (EXISTS (SELECT * FROM \"MYDB\".\"STAGING\" as stage WHERE ((sink.\"ID\" = stage.\"ID\") " + "AND (sink.\"NAME\" = stage.\"NAME\")) AND (sink.\"DIGEST\" = stage.\"DIGEST\"))))"; @@ -97,7 +141,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene String expectedUpsertQuery = "INSERT INTO \"MYDB\".\"MAIN\" " + "(\"ID\", \"NAME\", \"AMOUNT\", \"BIZ_DATE\", \"DIGEST\", \"BATCH_TIME_IN\", \"BATCH_TIME_OUT\") " + "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"MYDB\".\"STAGING\" as stage " + "WHERE NOT (stage.\"DIGEST\" IN (SELECT sink.\"DIGEST\" FROM \"MYDB\".\"MAIN\" as sink " + "WHERE sink.\"BATCH_TIME_OUT\" = '9999-12-31 23:59:59')))"; @@ -110,14 +154,14 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "SET sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + "(SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\")))) " + @@ -126,7 +170,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND (sink.\"biz_date\" = stage.\"biz_date\"))))"; @@ -140,14 +184,14 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink SET " + - "sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "sink.\"batch_time_out\" = '2000-01-01 00:00:00.000000' " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + "(NOT (EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND " + "(sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" = stage.\"digest\")))) AND " + @@ -156,7 +200,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_time_in\", \"batch_time_out\") " + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM \"mydb\".\"staging\" as stage " + "WHERE NOT (stage.\"digest\" IN (SELECT sink.\"digest\" FROM \"mydb\".\"main\" as sink " + "WHERE (sink.\"batch_time_out\" = '9999-12-31 23:59:59') AND " + "(sink.\"biz_date\" IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java index 2a6d727b17f..c0177c01732 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java @@ -24,13 +24,13 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\")" + - " (SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; + " (SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name','2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"TABLE_NAME\") = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"TABLE_NAME\") = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DatasetDeduplicationHandlerTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DatasetDeduplicationHandlerTest.java new file mode 100644 index 00000000000..6e73d57a9b8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DatasetDeduplicationHandlerTest.java @@ -0,0 +1,81 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.IngestModeTest; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicationHandler; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; +import org.finos.legend.engine.persistence.components.transformer.TransformOptions; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import java.util.List; + +public class DatasetDeduplicationHandlerTest extends IngestModeTest +{ + private final TransformOptions transformOptions = TransformOptions.builder().build(); + Dataset stagingDataset = DatasetDefinition.builder() + .database("my_db") + .group("my_schema") + .name("my_table") + .alias("stage") + .schema(baseTableSchemaWithVersion) + .build(); + + String expectedSql = "SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\",COUNT(*) as \"legend_persistence_count\" " + + "FROM \"my_db\".\"my_schema\".\"my_table\" as stage " + + "GROUP BY stage.\"id\", stage.\"name\", stage.\"version\", stage.\"biz_date\""; + + @Test + public void testDatasetDeduplicationFailOnDuplicates() + { + Dataset dedupedDataset = FailOnDuplicates.builder().build().accept(new DatasetDeduplicationHandler(stagingDataset)); + Selection dedupedSelection = (Selection) dedupedDataset; + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(dedupedSelection).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + Assertions.assertEquals(expectedSql, list.get(0)); + } + + @Test + public void testDatasetDeduplicationFilterDuplicates() + { + Dataset dedupedDataset = FilterDuplicates.builder().build().accept(new DatasetDeduplicationHandler(stagingDataset)); + Selection dedupedSelection = (Selection) dedupedDataset; + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(dedupedSelection).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + Assertions.assertEquals(expectedSql, list.get(0)); + } + + @Test + public void testDatasetDeduplicationAllowDuplicates() + { + Dataset dedupedDataset = AllowDuplicates.builder().build().accept(new DatasetDeduplicationHandler(stagingDataset)); + Assertions.assertTrue(dedupedDataset instanceof DatasetDefinition); + DatasetDefinition dedupedDatasetDef = (DatasetDefinition) dedupedDataset; + Assertions.assertEquals(dedupedDatasetDef, stagingDataset); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DatasetVersioningHandlerTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DatasetVersioningHandlerTest.java new file mode 100644 index 00000000000..58255358e61 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/DatasetVersioningHandlerTest.java @@ -0,0 +1,119 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.IngestModeTest; +import org.finos.legend.engine.persistence.components.common.DatasetFilter; +import org.finos.legend.engine.persistence.components.common.FilterType; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.*; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; +import org.finos.legend.engine.persistence.components.transformer.TransformOptions; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +public class DatasetVersioningHandlerTest extends IngestModeTest +{ + private final TransformOptions transformOptions = TransformOptions.builder().build(); + Dataset stagingDataset = DatasetDefinition.builder() + .database("my_db") + .group("my_schema") + .name("my_table") + .alias("stage") + .schema(baseTableSchemaWithVersion) + .build(); + + Dataset derivedStagingDataset = DerivedDataset.builder() + .database("my_db") + .group("my_schema") + .name("my_table") + .alias("stage") + .schema(baseTableSchemaWithVersion) + .addDatasetFilters(DatasetFilter.of("bizDate", FilterType.EQUAL_TO, "2020-01-01")) + .build(); + + List primaryKeys = Arrays.asList("id", "name"); + + @Test + public void testVersioningHandlerNoVersioningStrategy() + { + Dataset versionedDataset = NoVersioningStrategy.builder().build().accept(new DatasetVersioningHandler(stagingDataset, primaryKeys)); + Assertions.assertTrue(versionedDataset instanceof DatasetDefinition); + DatasetDefinition versionedDatasetDef = (DatasetDefinition) versionedDataset; + Assertions.assertEquals(versionedDatasetDef, stagingDataset); + } + + @Test + public void testVersioningHandlerMaxVersionStrategy() + { + Dataset versionedDataset = MaxVersionStrategy.builder().versioningField("version").build().accept(new DatasetVersioningHandler(stagingDataset, primaryKeys)); + Selection versionedSelection = (Selection) versionedDataset; + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(versionedSelection).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = "SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\",DENSE_RANK() OVER " + + "(PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_rank\" " + + "FROM \"my_db\".\"my_schema\".\"my_table\" as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1"; + Assertions.assertEquals(expectedSql, list.get(0)); + } + + @Test + public void testVersioningHandlerAllVersionsStrategy() + { + Dataset versionedDataset = AllVersionsStrategy.builder().versioningField("version").build().accept(new DatasetVersioningHandler(stagingDataset, primaryKeys)); + Selection versionedSelection = (Selection) versionedDataset; + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(versionedSelection).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = "SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\"," + + "DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" ASC) as \"legend_persistence_data_split\" " + + "FROM \"my_db\".\"my_schema\".\"my_table\" as stage"; + Assertions.assertEquals(expectedSql, list.get(0)); + } + + @Test + public void testVersioningHandlerWithDeduplicationHandler() + { + DeduplicationStrategy deduplicationStrategy = FailOnDuplicates.builder().build(); + VersioningStrategy versioningStrategy = AllVersionsStrategy.builder().versioningField("version").build(); + Dataset dedupAndVersionedDataset = LogicalPlanUtils.getDedupedAndVersionedDataset(deduplicationStrategy, versioningStrategy, derivedStagingDataset, primaryKeys); + + Selection versionedSelection = (Selection) dedupAndVersionedDataset; + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(versionedSelection).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = "SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\"," + + "DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" ASC) as \"legend_persistence_data_split\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\"," + + "COUNT(*) as \"legend_persistence_count\" FROM \"my_db\".\"my_schema\".\"my_table\" as stage WHERE stage.\"bizDate\" = '2020-01-01' " + + "GROUP BY stage.\"id\", stage.\"name\", stage.\"version\", stage.\"biz_date\") as stage"; + Assertions.assertEquals(expectedSql, list.get(0)); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java index 23007edbd49..254b721fa3b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java @@ -49,7 +49,7 @@ public void testInitializeLockInfo() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); String expectedSql = "INSERT INTO main_table_lock (\"insert_ts_utc\", \"table_name\") " + - "(SELECT '2000-01-01 00:00:00','main' WHERE NOT (EXISTS (SELECT * FROM main_table_lock as main_table_lock)))"; + "(SELECT '2000-01-01 00:00:00.000000','main' WHERE NOT (EXISTS (SELECT * FROM main_table_lock as main_table_lock)))"; Assertions.assertEquals(expectedSql, list.get(0)); } @@ -62,7 +62,7 @@ public void testUpdateMetaStore() LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "UPDATE main_table_lock as main_table_lock SET main_table_lock.\"last_used_ts_utc\" = '2000-01-01 00:00:00'"; + String expectedSql = "UPDATE main_table_lock as main_table_lock SET main_table_lock.\"last_used_ts_utc\" = '2000-01-01 00:00:00.000000'"; Assertions.assertEquals(expectedSql, list.get(0)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtilsTest.java index afc8c688070..fd0bd3c28d4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtilsTest.java @@ -17,9 +17,10 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.IngestModeTest; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DatasetDeduplicator; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionComparator; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersioningStrategy; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -38,71 +39,8 @@ import java.util.Map; import com.fasterxml.jackson.core.JsonProcessingException; -import static org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningComparator.GREATER_THAN; - public class LogicalPlanUtilsTest extends IngestModeTest { - - @Test - public void testDeduplicateByMaxVersion() - { - DatasetDefinition dataset = DatasetDefinition.builder() - .database("my_db") - .group("my_schema") - .name("my_table") - .alias("stage") - .schema(baseTableSchemaWithVersion) - .build(); - - RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get()); - - List primaryKeys = Arrays.asList("id", "name"); - VersioningStrategy versioningStrategy = MaxVersionStrategy.builder().versioningField("version").performDeduplication(true).versioningComparator(GREATER_THAN).build(); - Selection selection = (Selection) versioningStrategy.accept(new DatasetDeduplicator(dataset, primaryKeys)); - LogicalPlan logicalPlan = LogicalPlan.builder().addOps(selection).build(); - SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); - List list = physicalPlan.getSqlList(); - - String expectedSelectQuery = "(SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\" FROM " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\"," + - "ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" " + - "FROM \"my_db\".\"my_schema\".\"my_table\" as stage) as stage " + - "WHERE stage.\"legend_persistence_row_num\" = 1) as stage"; - Assertions.assertEquals(expectedSelectQuery, list.get(0)); - } - - @Test - public void testDeduplicateByMaxVersionAndFilterDataset() - { - RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get()); - List primaryKeys = Arrays.asList("id", "name"); - - Dataset dataset = DerivedDataset.builder() - .database("my_db") - .group("my_schema") - .name("my_table") - .alias("stage") - .schema(baseTableSchemaWithVersion) - .addDatasetFilters(DatasetFilter.of("biz_date", FilterType.GREATER_THAN, "2020-01-01")) - .addDatasetFilters(DatasetFilter.of("biz_date", FilterType.LESS_THAN, "2020-01-03")) - .build(); - - VersioningStrategy versioningStrategy = MaxVersionStrategy.builder().versioningField("version").performDeduplication(true).versioningComparator(GREATER_THAN).build(); - Selection selection = (Selection) versioningStrategy.accept(new DatasetDeduplicator(dataset, primaryKeys)); - - LogicalPlan logicalPlan = LogicalPlan.builder().addOps(selection).build(); - SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); - List list = physicalPlan.getSqlList(); - - String expectedSelectQuery = "(SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\" FROM " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"version\",stage.\"biz_date\",ROW_NUMBER() OVER " + - "(PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" " + - "FROM \"my_db\".\"my_schema\".\"my_table\" as stage " + - "WHERE (stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-03')) as stage " + - "WHERE stage.\"legend_persistence_row_num\" = 1) as stage"; - Assertions.assertEquals(expectedSelectQuery, list.get(0)); - } - @Test public void testJsonifyDatasetFilters() throws JsonProcessingException { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataUtilsTest.java index 85e14ea4ee2..97defb2c26e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataUtilsTest.java @@ -143,7 +143,7 @@ public void testInsertMetaStore() LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "INSERT INTO " + lowerCaseTableName() + " (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\") (SELECT 'main',(SELECT COALESCE(MAX(" + lowerCaseTableName() + ".\"table_batch_id\"),0)+1 FROM " + lowerCaseTableName() + " as " + lowerCaseTableName() + " WHERE UPPER(" + lowerCaseTableName() + ".\"table_name\") = 'MAIN'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + String expectedSql = "INSERT INTO " + lowerCaseTableName() + " (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\") (SELECT 'main',(SELECT COALESCE(MAX(" + lowerCaseTableName() + ".\"table_batch_id\"),0)+1 FROM " + lowerCaseTableName() + " as " + lowerCaseTableName() + " WHERE UPPER(" + lowerCaseTableName() + ".\"table_name\") = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; Assertions.assertEquals(expectedSql, list.get(0)); } @@ -158,7 +158,7 @@ public void testInsertMetaStoreWithUpperCase() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "INSERT INTO " + upperCaseTableName() + " (\"TABLE_NAME\", \"TABLE_BATCH_ID\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\") (SELECT 'main',(SELECT COALESCE(MAX(" + lowerCaseTableName() + ".\"TABLE_BATCH_ID\"),0)+1 FROM " + upperCaseTableName() + " as " + lowerCaseTableName() + " WHERE UPPER(" + lowerCaseTableName() + ".\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + String expectedSql = "INSERT INTO " + upperCaseTableName() + " (\"TABLE_NAME\", \"TABLE_BATCH_ID\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\") (SELECT 'main',(SELECT COALESCE(MAX(" + lowerCaseTableName() + ".\"TABLE_BATCH_ID\"),0)+1 FROM " + upperCaseTableName() + " as " + lowerCaseTableName() + " WHERE UPPER(" + lowerCaseTableName() + ".\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; Assertions.assertEquals(expectedSql, list.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java index f212f81aa25..b4655ac3a60 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java @@ -156,7 +156,7 @@ protected IngestorResult ingestViaExecutorAndVerifyStagingFilters(IngestMode ing // Load csv data loadData(path, datasets.stagingDataset(), 1); RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); - IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets); + IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets).get(0); verifyStagingFilters(ingestor, connection, datasets); return ingestorResult; @@ -383,7 +383,7 @@ public IngestorResult executePlansAndVerifyForCaseConversion(IngestMode ingestMo .caseConversion(CaseConversion.TO_UPPER) .build(); - IngestorResult result = ingestor.performFullIngestion(BigQueryConnection.of(getBigQueryConnection()), datasets); + IngestorResult result = ingestor.performFullIngestion(BigQueryConnection.of(getBigQueryConnection()), datasets).get(0); Map actualStats = result.statisticByName(); @@ -430,7 +430,7 @@ protected IngestorResult executePlansAndVerifyResults(IngestMode ingestMode, Pla .enableSchemaEvolution(options.enableSchemaEvolution()) .schemaEvolutionCapabilitySet(userCapabilitySet) .build(); - IngestorResult result = ingestor.performFullIngestion(BigQueryConnection.of(getBigQueryConnection()), datasets); + IngestorResult result = ingestor.performFullIngestion(BigQueryConnection.of(getBigQueryConnection()), datasets).get(0); Map actualStats = result.statisticByName(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java index a8efc6b0c04..559c2a0f5db 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java @@ -120,7 +120,7 @@ public void testMilestoning() throws IOException, InterruptedException .build(); RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); - IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets); + IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets).get(0); // Verify List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); @@ -178,7 +178,7 @@ public void testMilestoningFailure() throws IOException, InterruptedException .build(); RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); - IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets); + IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets).get(0); // Verify List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index 7cbf3000627..d0995b05d59 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -31,7 +31,6 @@ public class AppendOnlyTest extends org.finos.legend.engine.persistence.componen String rowsUpdated = "SELECT 0 as `rowsUpdated`"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`staging` as stage"; @Override public RelationalSink getRelationalSink() @@ -40,7 +39,7 @@ public RelationalSink getRelationalSink() } @Override - public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations) + public void verifyAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -52,45 +51,37 @@ public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); // Stats - verifyStats(operations); + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertNull(operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); } - @Override - public void verifyAppendOnlyAllowDuplicatesWithAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + - "FROM `mydb`.`staging` as stage)"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableCreateQueryWithAuditAndNoPKs, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } @Override - public void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List generatorResults, List dataSplitRanges) + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(List generatorResults, List dataSplitRanges) { String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + - "FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit, generatorResults.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, generatorResults.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates, generatorResults.get(0).deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), generatorResults.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), generatorResults.get(1).ingestSql().get(0)); Assertions.assertEquals(2, generatorResults.size()); // Stats - String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -101,72 +92,23 @@ public void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage)"; - - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyFailOnDuplicatesWithAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + - "FROM `mydb`.`staging` as stage)"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditNotPKCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyFilterDuplicatesNoAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + - "(sink.`digest` = stage.`digest`))))"; - - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); - Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); - Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); - Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); - } - - @Override - public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries) + public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRecords(GeneratorResult queries) { List preActionsSqlList = queries.preActionsSql(); List milestoningSqlList = queries.ingestSql(); + List deduplicationAndVersioningSql = queries.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + - "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); List postActionsSql = queries.postActionsSql(); @@ -175,6 +117,7 @@ public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries assertIfListsAreSameIgnoringOrder(expectedSQL, postActionsSql); // Stats + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(incomingRecordCount, queries.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, queries.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); @@ -184,26 +127,30 @@ public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries } @Override - public void verifyAppendOnlyFilterDuplicatesWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExistingRecords(List operations, List dataSplitRanges) { String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + - "FROM `mydb`.`staging` as stage " + - "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + - "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + - "(sink.`digest` = stage.`digest`)))))"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + + "(sink.`digest` = stage.`digest`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit, operations.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates, operations.get(0).deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -220,14 +167,13 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`) " + - "(SELECT * FROM `MYDB`.`STAGING` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink " + - "WHERE ((sink.`ID` = stage.`ID`) " + - "AND (sink.`NAME` = stage.`NAME`)) " + - "AND (sink.`DIGEST` = stage.`DIGEST`))))"; + String insertSql = "INSERT INTO `MYDB`.`MAIN` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`) " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE NOT (EXISTS " + + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestCreateQueryWithUpperCase, preActionsSqlList.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase, preActionsSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); } @@ -237,23 +183,71 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + - "(sink.`digest` = stage.`digest`))))"; + String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); } - private void verifyStats(GeneratorResult operations) + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecords(GeneratorResult operations) { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + + String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + } + + @Override + public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + + String insertSql = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java index 34139376753..51f7e0eec10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java @@ -53,6 +53,30 @@ public class BigQueryTestArtifacts "`digest` STRING," + "PRIMARY KEY (`id`, `name`) NOT ENFORCED)"; + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`biz_date` DATE," + + "`legend_persistence_count` INT64)"; + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`biz_date` DATE," + + "`digest` STRING," + + "`legend_persistence_count` INT64)"; + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`biz_date` DATE," + + "`digest` STRING," + + "`legend_persistence_count` INT64," + + "`data_split` INT64 NOT NULL)"; + public static String expectedBaseTablePlusDigestPlusVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + @@ -113,6 +137,15 @@ public class BigQueryTestArtifacts "`batch_update_time` DATETIME NOT NULL," + "PRIMARY KEY (`id`, `name`, `batch_update_time`) NOT ENFORCED)"; + public static String expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + + "`ID` INT64 NOT NULL," + + "`NAME` STRING NOT NULL," + + "`AMOUNT` FLOAT64," + + "`BIZ_DATE` DATE," + + "`DIGEST` STRING," + + "`BATCH_UPDATE_TIME` DATETIME NOT NULL," + + "PRIMARY KEY (`ID`, `NAME`, `BATCH_UPDATE_TIME`) NOT ENFORCED)"; + public static String expectedBaseTableWithAuditNotPKCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + @@ -132,6 +165,8 @@ public class BigQueryTestArtifacts public static String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage WHERE 1 = 1"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE 1 = 1"; + public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging` CASCADE"; public static String cleanUpMainTableSql = "DELETE FROM `mydb`.`main` as sink WHERE 1 = 1"; @@ -162,16 +197,16 @@ public class BigQueryTestArtifacts "`BATCH_ID_IN` INT64 NOT NULL,`BATCH_ID_OUT` INT64,PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`) NOT ENFORCED)"; public static String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`)" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'DONE')"; public static String expectedMetadataTableIngestWithStagingFiltersQuery = "INSERT INTO batch_metadata " + "(`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `staging_filters`) " + "(SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')," + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')," + "CURRENT_DATETIME(),'DONE',PARSE_JSON('{\"batch_id_in\":{\"GT\":5}}'))"; public static String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`)" + - " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'DONE')"; + " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'DONE')"; public static String expectedMetadataTableIngestQueryWithPlaceHolders = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) " + "(SELECT 'main',{BATCH_ID_PATTERN},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_END_TS_PATTERN}'),'DONE')"; @@ -228,11 +263,12 @@ public class BigQueryTestArtifacts "`digest` STRING," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`) NOT ENFORCED)"; - public static String expectedBitemporalMainTableWithBatchIdDatetimeCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`digest` STRING," + + "`version` INT64," + "`batch_id_in` INT64 NOT NULL," + "`batch_id_out` INT64," + "`batch_time_in` DATETIME," + @@ -241,11 +277,12 @@ public class BigQueryTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`) NOT ENFORCED)"; - public static String expectedBitemporalMainTableWithDatetimeCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`digest` STRING," + + "`version` INT64," + "`batch_time_in` DATETIME NOT NULL," + "`batch_time_out` DATETIME," + "`validity_from_target` DATETIME NOT NULL," + @@ -263,6 +300,18 @@ public class BigQueryTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`) NOT ENFORCED)"; + public static String expectedBitemporalFromOnlyMainTableWithVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`digest` STRING," + + "`version` INT64," + + "`batch_id_in` INT64 NOT NULL," + + "`batch_id_out` INT64," + + "`validity_from_target` DATETIME NOT NULL," + + "`validity_through_target` DATETIME," + + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`) NOT ENFORCED)"; + public static String expectedBitemporalFromOnlyStagingTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`staging`(" + "`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + @@ -317,6 +366,18 @@ public class BigQueryTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`) NOT ENFORCED)"; + public static String expectedBitemporalFromOnlyTempTableWithVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`temp`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`digest` STRING," + + "`version` INT64," + + "`batch_id_in` INT64 NOT NULL," + + "`batch_id_out` INT64," + + "`validity_from_target` DATETIME NOT NULL," + + "`validity_through_target` DATETIME," + + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`) NOT ENFORCED)"; + public static String expectedBitemporalFromOnlyTempTableBatchIdAndTimeBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`temp`(" + "`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + @@ -361,12 +422,13 @@ public class BigQueryTestArtifacts "`delete_indicator` STRING," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`) NOT ENFORCED)"; - public static String expectedBitemporalFromOnlyStageWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + + public static String expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`validity_from_reference` DATETIME NOT NULL," + "`digest` STRING," + + "`version` INT64," + "`data_split` INT64 NOT NULL," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`, `data_split`) NOT ENFORCED)"; @@ -379,4 +441,60 @@ public class BigQueryTestArtifacts "`delete_indicator` STRING," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`) NOT ENFORCED)"; + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`," + + "DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,COUNT(*) as `legend_persistence_count` " + + "FROM `mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage) " + + "as stage WHERE stage.`legend_persistence_rank` = 1)"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM " + + "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + + "WHERE stage.`legend_persistence_rank` = 1)"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; + + public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage"; + + public static String dataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE 1 = 1"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT` " + + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + + "stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT`," + + "DENSE_RANK() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`BIZ_DATE` DESC) as `LEGEND_PERSISTENCE_RANK` " + + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,COUNT(*) as `LEGEND_PERSISTENCE_COUNT` " + + "FROM `MYDB`.`STAGING` as stage GROUP BY stage.`ID`, stage.`NAME`, stage.`AMOUNT`, stage.`BIZ_DATE`, stage.`DIGEST`) as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_RANK` = 1)"; + public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java index 8b4ea1678a9..cc61379a451 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java @@ -71,24 +71,24 @@ public void verifyBitemporalDeltaBatchIdDateTimeBasedNoDeleteIndWithDataSplits(L { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`validity_from_target` = stage.`validity_from_reference`) AND (sink.`digest` <> stage.`digest`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `validity_from_target`, " + - "`validity_through_target`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + + "`validity_through_target`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`validity_through_reference`," + - "stage.`digest`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "stage.`digest`,stage.`version`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE " + "(sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`validity_from_target` = stage.`validity_from_reference`)))) AND " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalMainTableWithBatchIdDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedMilestoneQuery, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -153,7 +153,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND " + @@ -162,10 +162,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `validity_from_target`, `validity_through_target`, `digest`, " + + "(`id`, `name`, `amount`, `validity_from_target`, `validity_through_target`, `digest`, `version`, " + "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`," + - "stage.`validity_through_reference`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')," + + "stage.`validity_through_reference`,stage.`digest`,stage.`version`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')," + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage WHERE " + "((NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + "AND (sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + @@ -173,7 +173,7 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) AND " + "(stage.`delete_indicator` NOT IN ('yes','1','true')))"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalMainTableWithDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedMilestoneQuery, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -185,10 +185,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`data_split` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`data_split` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -132,10 +134,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`delete_indicator`,stage.`data_split` FROM `mydb`.`staging` as stage WHERE (stage.`delete_indicator` NOT IN ('yes','1','true')) AND ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -354,10 +360,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + "WHERE legend_persistence_x.`delete_indicator` = 0 " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_through_target` > legend_persistence_x.`legend_persistence_start_date`) AND (legend_persistence_y.`validity_through_target` <= legend_persistence_x.`legend_persistence_end_date`) AND (legend_persistence_y.`delete_indicator` <> 0) " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery, operations.get(0).preActionsSql().get(3)); @@ -446,6 +452,9 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedStageToStageWithoutDuplicates = "INSERT INTO `mydb`.`stagingWithoutDuplicates` " + - "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `data_split`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`data_split` FROM `mydb`.`staging` as stage " + + "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `version`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`data_split` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`digest` = stage.`digest`) AND (sink.`batch_id_out` = 999999999))))"; String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`data_split` FROM `mydb`.`stagingWithoutDuplicates` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`data_split` FROM `mydb`.`stagingWithoutDuplicates` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -566,10 +575,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_x.`validity_from_reference` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedMainToTemp = "INSERT INTO `mydb`.`temp` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`version`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,legend_persistence_x.`legend_persistence_end_date` as `legend_persistence_end_date` " + "FROM " + @@ -594,13 +603,13 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "AND (sink.`batch_id_out` = 999999999)"; String expectedTempToMain = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + - "(SELECT temp.`id`,temp.`name`,temp.`amount`,temp.`digest`,temp.`batch_id_in`,temp.`batch_id_out`,temp.`validity_from_target`,temp.`validity_through_target` FROM `mydb`.`temp` as temp)"; + "(`id`, `name`, `amount`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + + "(SELECT temp.`id`,temp.`name`,temp.`amount`,temp.`digest`,temp.`version`,temp.`batch_id_in`,temp.`batch_id_out`,temp.`validity_from_target`,temp.`validity_through_target` FROM `mydb`.`temp` as temp)"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyStageWithDataSplitWithoutDuplicatesTableCreateQuery, operations.get(0).preActionsSql().get(3)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyTempTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(2)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery, operations.get(0).preActionsSql().get(3)); Assertions.assertEquals(expectedStageToStageWithoutDuplicates, operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedStageToTemp, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(1)); @@ -620,6 +629,9 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(0).metadataIngestSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`temp`"), operations.get(0).postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`stagingWithoutDuplicates`"), operations.get(0).postCleanupSql().get(1)); + Assertions.assertEquals(2, operations.size()); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount,dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); @@ -763,6 +775,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`digest` STRING," + + "`version` INT64," + "`batch_id_in` INT64 NOT NULL," + "`batch_id_out` INT64," + "`validity_from_target` DATETIME NOT NULL," + @@ -774,6 +787,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`name` STRING NOT NULL," + "`amount` FLOAT64," + "`digest` STRING," + + "`version` INT64," + "`batch_id_in` INT64 NOT NULL," + "`batch_id_out` INT64," + "`validity_from_target` DATETIME NOT NULL," + @@ -787,20 +801,21 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`amount` FLOAT64," + "`validity_from_reference` DATETIME NOT NULL," + "`digest` STRING," + + "`version` INT64," + "`delete_indicator` STRING," + "`data_split` INT64 NOT NULL," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`, `data_split`) NOT ENFORCED)"; String expectedStageToStageWithoutDuplicates = "INSERT INTO " + stageWithoutDuplicatesName + " " + - "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `delete_indicator`, `data_split`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`delete_indicator`,stage.`data_split` FROM `mydb`.`staging` as stage " + + "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `version`, `delete_indicator`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`delete_indicator`,stage.`data_split` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`digest` = stage.`digest`) AND (sink.`batch_id_out` = 999999999))))"; String expectedStageToTemp = "INSERT INTO " + tempName + " " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT legend_persistence_stageWithoutDuplicates.`id`,legend_persistence_stageWithoutDuplicates.`name`,legend_persistence_stageWithoutDuplicates.`amount`,legend_persistence_stageWithoutDuplicates.`validity_from_reference`,legend_persistence_stageWithoutDuplicates.`digest`,legend_persistence_stageWithoutDuplicates.`delete_indicator`,legend_persistence_stageWithoutDuplicates.`data_split` FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.`delete_indicator` NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + + "(SELECT legend_persistence_stageWithoutDuplicates.`id`,legend_persistence_stageWithoutDuplicates.`name`,legend_persistence_stageWithoutDuplicates.`amount`,legend_persistence_stageWithoutDuplicates.`validity_from_reference`,legend_persistence_stageWithoutDuplicates.`digest`,legend_persistence_stageWithoutDuplicates.`version`,legend_persistence_stageWithoutDuplicates.`delete_indicator`,legend_persistence_stageWithoutDuplicates.`data_split` FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.`delete_indicator` NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -818,10 +833,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_x.`validity_from_reference` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedMainToTemp = "INSERT INTO " + tempName + " " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`version`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,legend_persistence_x.`legend_persistence_end_date` as `legend_persistence_end_date` " + "FROM " + @@ -846,12 +861,12 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "AND (sink.`batch_id_out` = 999999999)"; String expectedTempToMain = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + - "(SELECT legend_persistence_temp.`id`,legend_persistence_temp.`name`,legend_persistence_temp.`amount`,legend_persistence_temp.`digest`,legend_persistence_temp.`batch_id_in`,legend_persistence_temp.`batch_id_out`,legend_persistence_temp.`validity_from_target`,legend_persistence_temp.`validity_through_target` FROM " + tempName + " as legend_persistence_temp)"; + "(`id`, `name`, `amount`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + + "(SELECT legend_persistence_temp.`id`,legend_persistence_temp.`name`,legend_persistence_temp.`amount`,legend_persistence_temp.`digest`,legend_persistence_temp.`version`,legend_persistence_temp.`batch_id_in`,legend_persistence_temp.`batch_id_out`,legend_persistence_temp.`validity_from_target`,legend_persistence_temp.`validity_through_target` FROM " + tempName + " as legend_persistence_temp)"; String expectedMainToTempForDeletion = "INSERT INTO " + tempWithDeleteIndicatorName + " " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `delete_indicator`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_x.`validity_through_target` as `legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,(CASE WHEN legend_persistence_y.`delete_indicator` IS NULL THEN 0 ELSE 1 END) " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `delete_indicator`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_x.`validity_through_target` as `legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,(CASE WHEN legend_persistence_y.`delete_indicator` IS NULL THEN 0 ELSE 1 END) " + "FROM " + "(SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + "AND (EXISTS " + @@ -871,19 +886,19 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "AND (sink.`batch_id_out` = 999999999)"; String expectedTempToMainForDeletion = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + "FROM " + tempWithDeleteIndicatorName + " as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_from_target` > legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + "WHERE legend_persistence_x.`delete_indicator` = 0 " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_through_target` > legend_persistence_x.`legend_persistence_start_date`) AND (legend_persistence_y.`validity_through_target` <= legend_persistence_x.`legend_persistence_end_date`) AND (legend_persistence_y.`delete_indicator` <> 0) " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery, operations.get(0).preActionsSql().get(3)); @@ -915,6 +930,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(0).metadataIngestSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`main_legend_persistence_temp`"), operations.get(0).postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`main_legend_persistence_tempWithDeleteIndicator`"), operations.get(0).postCleanupSql().get(1)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`staging_legend_persistence_stageWithoutDuplicates`"), operations.get(0).postCleanupSql().get(2)); + Assertions.assertEquals(2, operations.size()); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`) AND (sink2.`validity_from_target` = sink.`validity_from_target`)) AND (sink2.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))))"; @@ -1005,7 +1024,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`," + - "legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1027,7 +1046,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedMainToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM " + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`batch_time_in`," + "sink.`batch_time_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink " + @@ -1051,7 +1070,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedUpdateMain = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (EXISTS " + "(SELECT * FROM `mydb`.`temp` as temp WHERE ((sink.`id` = temp.`id`) AND (sink.`name` = temp.`name`)) " + "AND (sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_id_out` = 999999999)"; @@ -1086,7 +1105,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`," + - "legend_persistence_y.`legend_persistence_end_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "legend_persistence_y.`legend_persistence_end_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1109,7 +1128,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`," + "legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM (SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_time_in`," + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM (SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_time_in`," + "sink.`batch_time_out`,sink.`validity_from_target`,sink.`validity_through_target` " + "FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as legend_persistence_x " + "INNER JOIN " + @@ -1132,7 +1151,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "AND (legend_persistence_x.`validity_from_target` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedUpdateMain = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (EXISTS (SELECT * FROM `mydb`.`temp` as temp WHERE " + "((sink.`id` = temp.`id`) AND (sink.`name` = temp.`name`)) AND " + "(sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))"; @@ -1154,8 +1173,8 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) as `rowsInserted`"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index c59096a0c3e..4d8899447e4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -137,11 +137,11 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT {NEXT_BATCH_ID},'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"xyz123\"}'))"; + "(SELECT {NEXT_BATCH_ID},'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"xyz123\"}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); @@ -151,7 +151,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -209,11 +209,11 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoTaskId() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + "PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -224,7 +224,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoTaskId() Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -330,7 +330,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `digest`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -340,7 +340,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -389,7 +389,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() String expectedInsertSql = "INSERT INTO `MY_DB`.`MY_NAME` " + "(`COL_INT`, `COL_STRING`, `COL_DECIMAL`, `COL_DATETIME`, `COL_VARIANT`, `DIGEST`, `BATCH_ID`, `APPEND_TIME`) " + - "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "FROM `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -399,7 +399,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() Assertions.assertEquals("SELECT 0 as `ROWSDELETED`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `ROWSTERMINATED`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `ROWSUPDATED`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `ROWSINSERTED` FROM `MY_DB`.`MY_NAME` as my_alias WHERE my_alias.`APPEND_TIME` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `ROWSINSERTED` FROM `MY_DB`.`MY_NAME` as my_alias WHERE my_alias.`APPEND_TIME` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java index 1e8dbb810ae..adf83659f9f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java @@ -54,7 +54,7 @@ public class IngestModeTest String[] partitionKeys = new String[] {"biz_date"}; HashMap> partitionFilter = new HashMap>() {{ - put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00", "2000-01-02 00:00:00"))); + put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00.000000", "2000-01-02 00:00:00"))); }}; // Base Columns: Primary keys : id, name @@ -167,9 +167,9 @@ public class IngestModeTest "`BATCH_STATUS` VARCHAR(32)," + "`TABLE_BATCH_ID` INTEGER)"; - protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_TIMESTAMP(),'DONE')"; + protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_TIMESTAMP(),'DONE')"; - protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.`TABLE_NAME` = 'main'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_TIMESTAMP(),'DONE')"; + protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.`TABLE_NAME` = 'main'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_TIMESTAMP(),'DONE')"; String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index 09fcb7745c0..667cb8c5ccb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -31,9 +31,15 @@ public class NontemporalDeltaTest extends org.finos.legend.engine.persistence.co protected String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + protected String incomingRecordCountWithSplitsTempStaginTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + + protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + protected String rowsTerminated = "SELECT 0 as `rowsTerminated`"; protected String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - protected String rowsDeletedWithDeleteIndicator = "SELECT COUNT(*) as `rowsDeleted` FROM `mydb`.`main` as sink WHERE EXISTS (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`) AND (stage.`delete_indicator` IN ('yes','1','true')))"; + protected String rowsDeletedWithDeleteIndicator = "SELECT COUNT(*) as `rowsDeleted` FROM `mydb`.`main` as sink WHERE EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`) AND (stage.`delete_indicator` IN ('yes','1','true')))"; @Override @@ -43,7 +49,7 @@ public RelationalSink getRelationalSink() } @Override - public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -73,13 +79,13 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio } @Override - public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING `mydb`.`staging` as stage " + + "USING `mydb`.`staging_legend_persistence_temp_staging` as stage " + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + "THEN UPDATE SET " + @@ -88,10 +94,10 @@ public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operat "sink.`amount` = stage.`amount`," + "sink.`biz_date` = stage.`biz_date`," + "sink.`digest` = stage.`digest`," + - "sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHEN NOT MATCHED THEN INSERT " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))"; + "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(mergeSql, milestoningSqlList.get(0)); @@ -103,7 +109,31 @@ public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operat } @Override - public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) + { + String mergeSql = "MERGE INTO `mydb`.`main` as sink " + + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + + "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest` " + + "WHEN NOT MATCHED " + + "THEN INSERT (`id`, `name`, `amount`, `biz_date`, `digest`) " + + "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`)"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); + + // Stats + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempStaginTable, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempStaginTable, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + + @Override + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO `mydb`.`main` as sink " + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage " + @@ -127,31 +157,31 @@ public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List } @Override - public void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO `mydb`.`main` as sink " + - "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage " + + "USING (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + - "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHEN NOT MATCHED " + "THEN INSERT (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))"; + "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); // Stats - Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); - Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsAndDuplicates, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsAndDuplicates, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsTerminated, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); Assertions.assertEquals(rowsDeleted, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); } @Override - public void verifyNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator(GeneratorResult operations) + public void verifyNontemporalDeltaNoAuditingWithDeleteIndicatorNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -250,15 +280,14 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(GeneratorResult operations) + public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String mergeSql = "MERGE INTO `mydb`.`main` as sink " + "USING " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`,ROW_NUMBER() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_row_num` FROM `mydb`.`staging` as stage WHERE stage.`snapshot_id` > 18972) as stage WHERE stage.`legend_persistence_row_num` = 1) as stage " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage " + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND stage.`version` > sink.`version` " + "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version` " + @@ -275,7 +304,7 @@ public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(Ge } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -299,7 +328,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaNoDedupMaxVersionWithoutPerform(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -322,15 +351,14 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String mergeSql = "MERGE INTO `MYDB`.`MAIN` as sink " + "USING " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` FROM " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`,ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) as `LEGEND_PERSISTENCE_ROW_NUM` FROM `MYDB`.`STAGING` as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + + "`MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + "ON (sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`) " + "WHEN MATCHED AND stage.`VERSION` >= sink.`VERSION` " + "THEN UPDATE SET sink.`ID` = stage.`ID`,sink.`NAME` = stage.`NAME`,sink.`AMOUNT` = stage.`AMOUNT`,sink.`BIZ_DATE` = stage.`BIZ_DATE`,sink.`DIGEST` = stage.`DIGEST`,sink.`VERSION` = stage.`VERSION` " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index e56b89495a9..1636e9de80d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -28,19 +28,18 @@ public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { String rowsDeleted = "SELECT COUNT(*) as `rowsDeleted` FROM `mydb`.`main` as sink"; - String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; String rowsUpdated = "SELECT 0 as `rowsUpdated`"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; @Override - public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`) " + - "(SELECT * FROM `mydb`.`staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date` FROM `mydb`.`staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedStagingTableCreateQuery, preActionsSqlList.get(1)); @@ -48,66 +47,51 @@ public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult opera Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override - public void verifyNontemporalSnapshotNoAuditingWithDataSplit(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date` FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`staging` as stage_right WHERE " + - "(stage.`data_split` < stage_right.`data_split`) AND ((stage.`id` = stage_right.`id`) AND (stage.`name` = stage_right.`name`)))))"; - - Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(BigQueryTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyNontemporalSnapshotWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + - "FROM `mydb`.`staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override - public void verifyNontemporalSnapshotWithAuditingWithDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); - String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + - "FROM `mydb`.`staging` as stage WHERE NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging` as stage_right " + - "WHERE (stage.`data_split` < stage_right.`data_split`) AND ((stage.`id` = stage_right.`id`) AND " + - "(stage.`name` = stage_right.`name`)))))"; + String insertSql = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`," + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override @@ -117,7 +101,7 @@ public void verifyNontemporalSnapshotWithUpperCaseOptimizer(GeneratorResult quer List milestoningSqlList = queries.ingestSql(); String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`) " + - "(SELECT * FROM `MYDB`.`STAGING` as stage)"; + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE` FROM `MYDB`.`STAGING` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableCreateQueryWithUpperCase, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.cleanupMainTableSqlUpperCase, milestoningSqlList.get(0)); @@ -131,7 +115,7 @@ public void verifyNontemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`) " + - "(SELECT * FROM `mydb`.`staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount` FROM `mydb`.`staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -148,9 +132,9 @@ public void verifyNontemporalSnapshotWithCleanStagingData(GeneratorResult operat } @Override - public void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostActions) + public void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostCleanup) { - List sqlsForPostActions = physicalPlanForPostActions.getSqlList(); + List sqlsForPostActions = physicalPlanForPostCleanup.getSqlList(); List expectedSQL = new ArrayList<>(); expectedSQL.add(BigQueryTestArtifacts.expectedDropTableQuery); assertIfListsAreSameIgnoringOrder(expectedSQL, sqlsForPostActions); @@ -162,12 +146,13 @@ public RelationalSink getRelationalSink() return BigQuerySink.get(); } - private void verifyStats(GeneratorResult operations) + private void verifyStats(GeneratorResult operations, String stageTableName) { // Pre stats: Assertions.assertEquals(rowsDeleted, operations.preIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); // Post Stats: + String incomingRecordCount = String.format("SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`%s` as stage", stageTableName); Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 18063de520c..7290a5e44eb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -26,7 +26,7 @@ public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -67,7 +67,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupAllVersionsWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + @@ -108,7 +108,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -118,7 +118,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -126,7 +126,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999 FROM `mydb`.`staging` as stage " + + "999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -147,13 +147,13 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -161,7 +161,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -179,7 +179,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -257,7 +257,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFilters(G } @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFiltersIncludesNullValues(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndWithOptimizationFiltersIncludesNullValues(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -317,35 +317,24 @@ public void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(GeneratorResult operations) + public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 " + - "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + - "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS " + - "(SELECT * FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`,ROW_NUMBER() " + - "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_row_num` " + - "FROM `mydb`.`staging` as stage WHERE stage.`batch_id_in` > 5) as stage " + - "WHERE stage.`legend_persistence_row_num` = 1) as stage " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; - String expectedUpsertQuery = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, " + - "`digest`, `version`, `batch_id_in`, `batch_id_out`) " + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 " + - "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + - "ROW_NUMBER() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) " + - "as `legend_persistence_row_num` FROM `mydb`.`staging` as stage WHERE stage.`batch_id_in` > 5) as stage " + - "WHERE stage.`legend_persistence_row_num` = 1) as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_id_out` = 999999999) AND (stage.`version` <= sink.`version`) " + - "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` " + + "as stage WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); @@ -356,7 +345,7 @@ public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(Gen } @Override - public void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(GeneratorResult operations) + public void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -393,7 +382,7 @@ public void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(Generato } @Override - public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -401,7 +390,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(stage.`version` > sink.`version`)))"; @@ -410,7 +399,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999 " + - "FROM `mydb`.`staging` as stage " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -424,7 +413,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -433,22 +422,16 @@ public void verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWitho "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA " + "as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 " + "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + - "(EXISTS (SELECT * FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` " + - "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + - "ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) " + - "as `LEGEND_PERSISTENCE_ROW_NUM` FROM `MYDB`.`STAGING` as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + + "(EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + "WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` " + - "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + - "ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) as `LEGEND_PERSISTENCE_ROW_NUM` " + - "FROM `MYDB`.`STAGING` as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) " + - "AND (stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + + "(stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 018e7800bcc..9cc6944a77d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -27,7 +27,7 @@ public class UnitemporalDeltaBatchIdDateTimeBasedTest extends UnitmemporalDeltaBatchIdDateTimeBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -35,7 +35,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -45,7 +45,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -68,13 +68,13 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -82,8 +82,8 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -101,7 +101,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -119,7 +119,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -131,7 +131,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -154,7 +154,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator } @Override - public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalDeltaWithDeleteInd(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -174,7 +174,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -189,13 +189,13 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + @@ -236,8 +236,8 @@ public void verifyUnitemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); @@ -254,7 +254,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`)))"; @@ -263,7 +263,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -329,7 +329,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) String expectedMilestoneQuery = "UPDATE `my_schema`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `my_schema`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -339,7 +339,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `my_schema`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `my_schema`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -374,7 +374,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati String expectedMilestoneQuery = "UPDATE `mydb`.`my_schema`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`my_schema`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -384,7 +384,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`my_schema`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`my_schema`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -419,7 +419,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper String expectedMilestoneQuery = "UPDATE main as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM staging as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -429,7 +429,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM staging as stage " + "WHERE NOT (EXISTS (SELECT * FROM main as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index a2a6cb0fb30..cb6962a0515 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -26,14 +26,14 @@ public class UnitemporalDeltaDateTimeBasedTest extends UnitmemporalDeltaDateTimeBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -42,7 +42,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + @@ -57,29 +57,29 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio // Stats String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) as `rowsInserted`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + - "FROM `mydb`.`staging` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + @@ -97,23 +97,23 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE " + "(sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -124,7 +124,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, " + "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -139,21 +139,21 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper // Stats String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))))"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))))) as `rowsInserted`"; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))))) as `rowsTerminated`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsInserted`"; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsTerminated`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE " + "(sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -161,7 +161,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + @@ -180,11 +180,11 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java index 219547c5ad1..2d36914369f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -21,6 +22,7 @@ import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBatchIdBasedTestCases { @@ -31,7 +33,7 @@ public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBat String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1)-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))))) as `rowsTerminated`"; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -60,6 +62,41 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + + "WHERE (sink.`batch_id_out` = 999999999) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(2)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(GeneratorResult operations) { @@ -90,7 +127,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -120,7 +157,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 31fa65fd32a..6e709c287b3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -21,6 +22,7 @@ import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSnapshotBatchIdDateTimeBasedTestCases { @@ -31,14 +33,14 @@ public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSna String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1)-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))))) as `rowsTerminated`"; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -46,7 +48,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; @@ -60,6 +62,42 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_id_out` = 999999999) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations) { @@ -67,7 +105,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa List milestoningSql = operations.ingestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE sink.`batch_id_out` = 999999999"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -76,31 +114,47 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa } @Override - public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + + "AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 " + + "FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999," + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink WHERE sink.`BATCH_ID_OUT` = 999999999)))"; - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink WHERE sink.`BATCH_ID_OUT` = 999999999)))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithUpperCase(), metadataIngestSql.get(0)); } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -109,7 +163,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND (sink.`biz_date` = stage.`biz_date`))))"; @@ -136,14 +190,14 @@ public void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(G } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -152,7 +206,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; @@ -172,7 +226,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmp List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; @@ -198,7 +252,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -206,7 +260,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index 1986c36015b..8e748b0bb41 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -21,25 +22,26 @@ import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))))"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))))) as `rowsInserted`"; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'))))) as `rowsTerminated`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsInserted`"; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsTerminated`"; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage " + @@ -48,7 +50,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))))"; @@ -61,6 +63,44 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(GeneratorResult operations) { @@ -69,7 +109,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandli List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -87,7 +127,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET " + - "sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + "(NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -95,7 +135,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage " + "WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink " + "WHERE sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))))"; @@ -108,14 +148,14 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -124,7 +164,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`biz_date` = stage.`biz_date`))))"; @@ -138,14 +178,14 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) AND " + @@ -154,7 +194,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + "(sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java index 739b22c7274..8d7be47cd26 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java @@ -24,14 +24,14 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA " + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`TABLE_NAME`) = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`TABLE_NAME`) = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index fda329b17ee..3ea9a5d4d9b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -14,19 +14,37 @@ package org.finos.legend.engine.persistence.components.relational.api; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.eclipse.collections.api.tuple.Pair; +import org.eclipse.collections.impl.tuple.Tuples; +import org.finos.legend.engine.persistence.components.common.DatasetFilter; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.ingestmode.DeriveMainDatasetSchemaFromStaging; -import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; -import org.finos.legend.engine.persistence.components.ingestmode.IngestModeCaseConverter; +import org.finos.legend.engine.persistence.components.common.FilterType; +import org.finos.legend.engine.persistence.components.common.OptimizationFilter; +import org.finos.legend.engine.persistence.components.executor.Executor; +import org.finos.legend.engine.persistence.components.ingestmode.*; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.planner.Planner; import org.finos.legend.engine.persistence.components.relational.CaseConversion; +import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.sql.TabularData; +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +import org.finos.legend.engine.persistence.components.transformer.Transformer; import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; -import java.util.List; +import java.util.*; + +import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; +import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; public class ApiUtils { @@ -97,4 +115,125 @@ private static LockInfoDataset getLockInfoDataset(Datasets datasets) } return lockInfoDataset; } + + public static Optional getNextBatchId(Datasets datasets, Executor executor, + Transformer transformer, IngestMode ingestMode) + { + if (ingestMode.accept(IngestModeVisitors.IS_INGEST_MODE_TEMPORAL) || ingestMode instanceof BulkLoad) + { + LogicalPlan logicalPlanForNextBatchId = LogicalPlanFactory.getLogicalPlanForNextBatchId(datasets, ingestMode); + List tabularData = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(logicalPlanForNextBatchId)); + Optional nextBatchId = getFirstColumnValue(getFirstRowForFirstResult(tabularData)); + if (nextBatchId.isPresent()) + { + return retrieveValueAsLong(nextBatchId.get()); + } + } + return Optional.empty(); + } + + public static Optional>> getOptimizationFilterBounds(Datasets datasets, Executor executor, + Transformer transformer, IngestMode ingestMode) + { + List filters = ingestMode.accept(IngestModeVisitors.RETRIEVE_OPTIMIZATION_FILTERS); + if (!filters.isEmpty()) + { + Map> map = new HashMap<>(); + for (OptimizationFilter filter : filters) + { + LogicalPlan logicalPlanForMinAndMaxForField = LogicalPlanFactory.getLogicalPlanForMinAndMaxForField(datasets.stagingDataset(), filter.fieldName()); + List tabularData = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(logicalPlanForMinAndMaxForField)); + Map resultMap = getFirstRowForFirstResult(tabularData); + // Put into map only when not null + Object lower = resultMap.get(MIN_OF_FIELD); + Object upper = resultMap.get(MAX_OF_FIELD); + if (lower != null && upper != null) + { + map.put(filter, Tuples.pair(lower, upper)); + } + } + return Optional.of(map); + } + return Optional.empty(); + } + + public static List extractDatasetFilters(MetadataDataset metadataDataset, Executor executor, SqlPlan physicalPlan) throws JsonProcessingException + { + List datasetFilters = new ArrayList<>(); + List results = executor.executePhysicalPlanAndGetResults(physicalPlan); + Optional stagingFilters = results.stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .map(stringObjectMap -> (String) stringObjectMap.get(metadataDataset.stagingFiltersField())); + + // Convert map of Filters to List of Filters + if (stagingFilters.isPresent()) + { + Map> datasetFiltersMap = new ObjectMapper().readValue(stagingFilters.get(), new TypeReference>>() {}); + for (Map.Entry> filtersMapEntry : datasetFiltersMap.entrySet()) + { + for (Map.Entry filterEntry : filtersMapEntry.getValue().entrySet()) + { + DatasetFilter datasetFilter = DatasetFilter.of(filtersMapEntry.getKey(), FilterType.fromName(filterEntry.getKey()), filterEntry.getValue()); + datasetFilters.add(datasetFilter); + } + } + } + return datasetFilters; + } + + public static List getDataSplitRanges(Executor executor, Planner planner, + Transformer transformer, IngestMode ingestMode) + { + List dataSplitRanges = new ArrayList<>(); + if (ingestMode.versioningStrategy() instanceof AllVersionsStrategy) + { + Dataset stagingDataset = planner.stagingDataset(); + String dataSplitField = ingestMode.dataSplitField().get(); + LogicalPlan logicalPlanForMaxOfField = LogicalPlanFactory.getLogicalPlanForMaxOfField(stagingDataset, dataSplitField); + List tabularData = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(logicalPlanForMaxOfField)); + Map row = getFirstRowForFirstResult(tabularData); + Long maxDataSplit = retrieveValueAsLong(row.get(MAX_OF_FIELD)).orElseThrow(IllegalStateException::new); + for (int i = 1; i <= maxDataSplit; i++) + { + dataSplitRanges.add(DataSplitRange.of(i, i)); + } + } + return dataSplitRanges; + } + + public static Optional retrieveValueAsLong(Object obj) + { + if (obj instanceof Integer) + { + return Optional.of(Long.valueOf((Integer) obj)); + } + else if (obj instanceof Long) + { + return Optional.of((Long) obj); + } + return Optional.empty(); + } + + public static Map getFirstRowForFirstResult(List tabularData) + { + Map resultMap = tabularData.stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .orElse(Collections.emptyMap()); + return resultMap; + } + + public static Optional getFirstColumnValue(Map row) + { + Optional object = Optional.empty(); + if (!row.isEmpty()) + { + String key = row.keySet().stream().findFirst().orElseThrow(IllegalStateException::new); + object = Optional.ofNullable(row.get(key)); + } + return object; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 3cfc890a74f..b88ef115205 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.api; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -56,10 +57,14 @@ public abstract class GeneratorResultAbstract public abstract Optional metadataIngestSqlPlan(); + public abstract Optional deduplicationAndVersioningSqlPlan(); + public abstract SqlPlan postActionsSqlPlan(); public abstract Optional postCleanupSqlPlan(); + public abstract Map deduplicationAndVersioningErrorChecksSqlPlan(); + public abstract Map preIngestStatisticsSqlPlan(); public abstract Map postIngestStatisticsSqlPlan(); @@ -99,6 +104,11 @@ public List metadataIngestSql() return metadataIngestSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); } + public List deduplicationAndVersioningSql() + { + return deduplicationAndVersioningSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); + } + public List postActionsSql() { return postActionsSqlPlan().getSqlList(); @@ -117,6 +127,14 @@ public Map preIngestStatisticsSql() k -> preIngestStatisticsSqlPlan().get(k).getSql())); } + public Map deduplicationAndVersioningErrorChecksSql() + { + return deduplicationAndVersioningErrorChecksSqlPlan().keySet().stream() + .collect(Collectors.toMap( + k -> k, + k -> deduplicationAndVersioningErrorChecksSqlPlan().get(k).getSql())); + } + public Map postIngestStatisticsSql() { return postIngestStatisticsSqlPlan().keySet().stream() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index e597d6451bb..2858ce87f80 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.api; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; @@ -45,7 +46,6 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import java.util.UUID; @Immutable @Style( @@ -242,6 +242,21 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann planner = Planners.get(datasets.withMainDataset(schemaEvolutionDataset.get()), ingestMode, plannerOptions(), relationalSink().capabilities()); } + // deduplication and versioning + LogicalPlan deduplicationAndVersioningLogicalPlan = planner.buildLogicalPlanForDeduplicationAndVersioning(resources); + Optional deduplicationAndVersioningSqlPlan = Optional.empty(); + if (deduplicationAndVersioningLogicalPlan != null) + { + deduplicationAndVersioningSqlPlan = Optional.of(transformer.generatePhysicalPlan(deduplicationAndVersioningLogicalPlan)); + } + + Map deduplicationAndVersioningErrorChecksLogicalPlan = planner.buildLogicalPlanForDeduplicationAndVersioningErrorChecks(resources); + Map deduplicationAndVersioningErrorChecksSqlPlan = new HashMap<>(); + for (DedupAndVersionErrorStatistics statistic : deduplicationAndVersioningErrorChecksLogicalPlan.keySet()) + { + deduplicationAndVersioningErrorChecksSqlPlan.put(statistic, transformer.generatePhysicalPlan(deduplicationAndVersioningErrorChecksLogicalPlan.get(statistic))); + } + // ingest LogicalPlan ingestLogicalPlan = planner.buildLogicalPlanForIngest(resources); SqlPlan ingestSqlPlan = transformer.generatePhysicalPlan(ingestLogicalPlan); @@ -282,6 +297,8 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann .postActionsSqlPlan(postActionsSqlPlan) .postCleanupSqlPlan(postCleanupSqlPlan) .metadataIngestSqlPlan(metaDataIngestSqlPlan) + .deduplicationAndVersioningSqlPlan(deduplicationAndVersioningSqlPlan) + .putAllDeduplicationAndVersioningErrorChecksSqlPlan(deduplicationAndVersioningErrorChecksSqlPlan) .putAllPreIngestStatisticsSqlPlan(preIngestStatisticsSqlPlan) .putAllPostIngestStatisticsSqlPlan(postIngestStatisticsSqlPlan) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index c7f1f7ab612..12faa47d745 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -15,33 +15,16 @@ package org.finos.legend.engine.persistence.components.relational.api; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; import org.eclipse.collections.api.tuple.Pair; -import org.eclipse.collections.impl.tuple.Tuples; -import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.OptimizationFilter; -import org.finos.legend.engine.persistence.components.common.Resources; -import org.finos.legend.engine.persistence.components.common.DatasetFilter; -import org.finos.legend.engine.persistence.components.common.FilterType; -import org.finos.legend.engine.persistence.components.common.StatisticName; +import org.finos.legend.engine.persistence.components.common.*; import org.finos.legend.engine.persistence.components.executor.DigestInfo; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.importer.Importer; import org.finos.legend.engine.persistence.components.importer.Importers; -import org.finos.legend.engine.persistence.components.ingestmode.DeriveMainDatasetSchemaFromStaging; -import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; -import org.finos.legend.engine.persistence.components.ingestmode.IngestModeOptimizationColumnHandler; -import org.finos.legend.engine.persistence.components.ingestmode.IngestModeVisitors; -import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.*; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetReference; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.ExternalDatasetReference; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; -import org.finos.legend.engine.persistence.components.ingestmode.TempDatasetsEnricher; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.planner.Planner; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; @@ -62,25 +45,16 @@ import org.immutables.value.Value.Derived; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Style; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import java.sql.Date; import java.time.Clock; import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Arrays; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; -import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; +import java.util.*; + import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.TABLE_IS_NON_EMPTY; +import static org.finos.legend.engine.persistence.components.relational.api.ApiUtils.*; import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.BULK_LOAD_BATCH_STATUS_PATTERN; import static org.finos.legend.engine.persistence.components.transformer.Transformer.TransformOptionsAbstract.DATE_TIME_FORMATTER; @@ -240,14 +214,26 @@ public Datasets evolve(Datasets datasets) return this.enrichedDatasets; } + /* + - Perform cleanup of temporary tables + */ + public Datasets dedupAndVersion(Datasets datasets) + { + LOGGER.info("Invoked dedupAndVersion method, will perform Deduplication and Versioning"); + init(datasets); + dedupAndVersion(); + return this.enrichedDatasets; + } + /* - Perform ingestion from staging to main dataset based on the Ingest mode, executes in current transaction */ - public IngestorResult ingest(Datasets datasets) + public List ingest(Datasets datasets) { LOGGER.info("Invoked ingest method, will perform the ingestion"); init(datasets); - IngestorResult result = ingest(Arrays.asList()).stream().findFirst().orElseThrow(IllegalStateException::new); + List dataSplitRanges = ApiUtils.getDataSplitRanges(executor, planner, transformer, ingestMode()); + List result = ingest(dataSplitRanges); LOGGER.info("Ingestion completed"); return result; } @@ -272,10 +258,10 @@ public Datasets cleanUp(Datasets datasets) 4. Ingestion from staging to main dataset in a transaction 5. Clean up of temporary tables */ - public IngestorResult performFullIngestion(RelationalConnection connection, Datasets datasets) + public List performFullIngestion(RelationalConnection connection, Datasets datasets) { LOGGER.info("Invoked performFullIngestion method"); - return performFullIngestion(connection, datasets, null).stream().findFirst().orElseThrow(IllegalStateException::new); + return performFullIngestion(connection, datasets, new ArrayList<>()); } /* @@ -314,7 +300,7 @@ public List getLatestStagingFilters(RelationalConnection connecti Transformer transformer = new RelationalTransformer(relationalSink(), transformOptions()); Executor executor = relationalSink().getRelationalExecutor(connection); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); - return extractDatasetFilters(metadataDataset, executor, physicalPlan); + return ApiUtils.extractDatasetFilters(metadataDataset, executor, physicalPlan); } // ---------- UTILITY METHODS ---------- @@ -335,6 +321,34 @@ private void createAllDatasets() executor.executePhysicalPlan(generatorResult.preActionsSqlPlan()); } + private void dedupAndVersion() + { + if (generatorResult.deduplicationAndVersioningSqlPlan().isPresent()) + { + LOGGER.info("Executing Deduplication and Versioning"); + executor.executePhysicalPlan(generatorResult.deduplicationAndVersioningSqlPlan().get()); + Map errorStatistics = executeDeduplicationAndVersioningErrorChecks(executor, generatorResult.deduplicationAndVersioningErrorChecksSqlPlan()); + /* Error Checks + 1. if Dedup = fail on dups, Fail the job if count > 1 + 2. If versioining = Max Version/ All Versioin, Check for data error + */ + Optional maxDuplicatesValue = retrieveValueAsLong(errorStatistics.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Optional maxDataErrorsValue = retrieveValueAsLong(errorStatistics.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + if (maxDuplicatesValue.isPresent() && maxDuplicatesValue.get() > 1) + { + String errorMessage = "Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy"; + LOGGER.error(errorMessage); + throw new RuntimeException(errorMessage); + } + if (maxDataErrorsValue.isPresent() && maxDataErrorsValue.get() > 1) + { + String errorMessage = "Encountered Data errors (same PK, same version but different data), hence failing the batch"; + LOGGER.error(errorMessage); + throw new RuntimeException(errorMessage); + } + } + } + private void initializeLock() { if (enableConcurrentSafety()) @@ -404,6 +418,14 @@ private List performFullIngestion(RelationalConnection connectio // Evolve Schema evolveSchema(); + // Dedup and Version + dedupAndVersion(); + // Find the data split ranges based on the result of dedup and versioning + if (dataSplitRanges.isEmpty()) + { + dataSplitRanges = ApiUtils.getDataSplitRanges(executor, planner, transformer, ingestMode()); + } + // Perform Ingestion List result; try @@ -622,14 +644,8 @@ private boolean datasetEmpty(Dataset dataset, Transformer trans LogicalPlan checkIsDatasetEmptyLogicalPlan = LogicalPlanFactory.getLogicalPlanForIsDatasetEmpty(dataset); SqlPlan physicalPlanForCheckIsDataSetEmpty = transformer.generatePhysicalPlan(checkIsDatasetEmptyLogicalPlan); List results = executor.executePhysicalPlanAndGetResults(physicalPlanForCheckIsDataSetEmpty); - - String value = String.valueOf(results.stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new)); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(results)); + String value = String.valueOf(obj.orElseThrow(IllegalStateException::new)); return !value.equals(TABLE_IS_NON_EMPTY); } @@ -637,18 +653,29 @@ private Map executeStatisticsPhysicalPlan(Executor statisticsSqlPlan, Map placeHolderKeyValues) { - return statisticsSqlPlan.keySet() - .stream() - .collect(Collectors.toMap( - k -> k, - k -> executor.executePhysicalPlanAndGetResults(statisticsSqlPlan.get(k), placeHolderKeyValues) - .stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new))); + Map results = new HashMap<>(); + for (Map.Entry entry: statisticsSqlPlan.entrySet()) + { + List result = executor.executePhysicalPlanAndGetResults(entry.getValue(), placeHolderKeyValues); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); + Object value = obj.orElse(null); + results.put(entry.getKey(), value); + } + return results; + } + + private Map executeDeduplicationAndVersioningErrorChecks(Executor executor, + Map errorChecksPlan) + { + Map results = new HashMap<>(); + for (Map.Entry entry: errorChecksPlan.entrySet()) + { + List result = executor.executePhysicalPlanAndGetResults(entry.getValue()); + Optional obj = getFirstColumnValue(getFirstRowForFirstResult(result)); + Object value = obj.orElse(null); + results.put(entry.getKey(), value); + } + return results; } private Map extractPlaceHolderKeyValues(Datasets datasets, Executor executor, @@ -656,8 +683,8 @@ private Map extractPlaceHolderKeyValues(Datasets datasets, Execu Optional dataSplitRange) { Map placeHolderKeyValues = new HashMap<>(); - Optional nextBatchId = getNextBatchId(datasets, executor, transformer, ingestMode); - Optional>> optimizationFilters = getOptimizationFilterBounds(datasets, executor, transformer, ingestMode); + Optional nextBatchId = ApiUtils.getNextBatchId(datasets, executor, transformer, ingestMode); + Optional>> optimizationFilters = ApiUtils.getOptimizationFilterBounds(datasets, executor, transformer, ingestMode); if (nextBatchId.isPresent()) { LOGGER.info(String.format("Obtained the next Batch id: %s", nextBatchId.get())); @@ -694,88 +721,4 @@ else if (lowerBound instanceof Number) return placeHolderKeyValues; } - private Optional getNextBatchId(Datasets datasets, Executor executor, - Transformer transformer, IngestMode ingestMode) - { - if (ingestMode.accept(IngestModeVisitors.IS_INGEST_MODE_TEMPORAL) || ingestMode instanceof BulkLoad) - { - LogicalPlan logicalPlanForNextBatchId = LogicalPlanFactory.getLogicalPlanForNextBatchId(datasets, ingestMode); - List tabularData = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(logicalPlanForNextBatchId)); - Optional nextBatchId = Optional.ofNullable(tabularData.stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new)); - if (nextBatchId.isPresent()) - { - if (nextBatchId.get() instanceof Integer) - { - return Optional.of(Long.valueOf((Integer) nextBatchId.get())); - } - if (nextBatchId.get() instanceof Long) - { - return Optional.of((Long) nextBatchId.get()); - } - } - } - return Optional.empty(); - } - - private Optional>> getOptimizationFilterBounds(Datasets datasets, Executor executor, - Transformer transformer, IngestMode ingestMode) - { - List filters = ingestMode.accept(IngestModeVisitors.RETRIEVE_OPTIMIZATION_FILTERS); - if (!filters.isEmpty()) - { - Map> map = new HashMap<>(); - for (OptimizationFilter filter : filters) - { - LogicalPlan logicalPlanForMinAndMaxForField = LogicalPlanFactory.getLogicalPlanForMinAndMaxForField(datasets.stagingDataset(), filter.fieldName()); - List tabularData = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(logicalPlanForMinAndMaxForField)); - Map resultMap = tabularData.stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new); - // Put into map only when not null - Object lower = resultMap.get(MIN_OF_FIELD); - Object upper = resultMap.get(MAX_OF_FIELD); - if (lower != null && upper != null) - { - map.put(filter, Tuples.pair(lower, upper)); - } - } - return Optional.of(map); - } - return Optional.empty(); - } - - private List extractDatasetFilters(MetadataDataset metadataDataset, Executor executor, SqlPlan physicalPlan) throws JsonProcessingException - { - List datasetFilters = new ArrayList<>(); - List results = executor.executePhysicalPlanAndGetResults(physicalPlan); - Optional stagingFilters = results.stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(stringObjectMap -> (String) stringObjectMap.get(metadataDataset.stagingFiltersField())); - - // Convert map of Filters to List of Filters - if (stagingFilters.isPresent()) - { - Map> datasetFiltersMap = new ObjectMapper().readValue(stagingFilters.get(), new TypeReference>>() {}); - for (Map.Entry> filtersMapEntry : datasetFiltersMap.entrySet()) - { - for (Map.Entry filterEntry : filtersMapEntry.getValue().entrySet()) - { - DatasetFilter datasetFilter = DatasetFilter.of(filtersMapEntry.getKey(), FilterType.fromName(filterEntry.getKey()), filterEntry.getValue()); - datasetFilters.add(datasetFilter); - } - } - } - return datasetFilters; - } - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java index 6a345b82ebe..52423165eab 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java @@ -24,6 +24,7 @@ public enum FunctionName MAX("MAX"), MIN("MIN"), COUNT("COUNT"), + DISTINCT("DISTINCT"), COALESCE("COALESCE"), CURRENT_TIME("CURRENT_TIME"), CURRENT_DATE("CURRENT_DATE"), @@ -36,6 +37,7 @@ public enum FunctionName UPPER("UPPER"), SUBSTRING("SUBSTRING"), ROW_NUMBER("ROW_NUMBER"), + DENSE_RANK("DENSE_RANK"), DATE("DATE"), DATE_TRUNC("DATE_TRUNC"), DATETIME_TRUNC("DATETIME_TRUNC"), diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/SchemaDefinitionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/SchemaDefinitionVisitor.java index 85b77b168b4..e0485bb683a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/SchemaDefinitionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/SchemaDefinitionVisitor.java @@ -17,6 +17,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Index; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; import org.finos.legend.engine.persistence.components.relational.sqldom.constraints.column.ColumnConstraint; @@ -63,6 +64,10 @@ public VisitorResult visit(PhysicalPlanNode prev, SchemaDefinition current, Visi columnConstraints.add(new UniqueColumnConstraint()); } Column column = new Column(f.name(), dataType, columnConstraints, context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) + { + column = (Column) optimizer.optimize(column); + } prev.push(column); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index fcefd9b14ef..0341dd2620d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -23,6 +23,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -52,6 +53,7 @@ import java.util.ArrayList; import java.util.Comparator; import java.util.Set; +import java.util.stream.Collectors; public class BaseTest { @@ -66,7 +68,7 @@ public class BaseTest protected final ZonedDateTime fixedExecutionZonedDateTime1 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); protected final Clock fixedClock_2000_01_01 = Clock.fixed(fixedExecutionZonedDateTime1.toInstant(), ZoneOffset.UTC); - protected ZonedDateTime fixedExecutionZonedDateTime2 = ZonedDateTime.of(2000, 1, 2, 0, 0, 0, 0, ZoneOffset.UTC); + protected ZonedDateTime fixedExecutionZonedDateTime2 = ZonedDateTime.of(2000, 1, 2, 0, 0, 0, 123456000, ZoneOffset.UTC); protected Clock fixedClock_2000_01_02 = Clock.fixed(fixedExecutionZonedDateTime2.toInstant(), ZoneOffset.UTC); protected final ZonedDateTime fixedExecutionZonedDateTime3 = ZonedDateTime.of(2000, 1, 3, 0, 0, 0, 0, ZoneOffset.UTC); @@ -104,6 +106,16 @@ public void tearDown() throws Exception h2Sink.executeStatement("DROP ALL OBJECTS"); } + protected void createStagingTableWithoutPks(DatasetDefinition stagingTable) throws Exception + { + List fieldsWithoutPk = stagingTable.schema().fields().stream().map(field -> field.withPrimaryKey(false)).collect(Collectors.toList()); + stagingTable = stagingTable.withSchema(stagingTable.schema().withFields(fieldsWithoutPk)); + RelationalTransformer transformer = new RelationalTransformer(H2Sink.get()); + LogicalPlan tableCreationPlan = LogicalPlanFactory.getDatasetCreationPlan(stagingTable, true); + SqlPlan tableCreationPhysicalPlan = transformer.generatePhysicalPlan(tableCreationPlan); + executor.executePhysicalPlan(tableCreationPhysicalPlan); + } + protected void createStagingTable(DatasetDefinition stagingTable) throws Exception { RelationalTransformer transformer = new RelationalTransformer(H2Sink.get()); @@ -133,9 +145,9 @@ protected IngestorResult executePlansAndVerifyResults(IngestMode ingestMode, Pla return executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPath, expectedStats, Clock.systemUTC()); } - protected IngestorResult executePlansAndVerifyResults(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, Map expectedStats, Set userCapabilitySet) throws Exception + protected IngestorResult executePlansAndVerifyResults(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, Map expectedStats, Set userCapabilitySet, Clock executionTimestampClock) throws Exception { - return executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPath, expectedStats, Clock.systemUTC(), userCapabilitySet, false); + return executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPath, expectedStats, executionTimestampClock, userCapabilitySet, false); } private void verifyLatestStagingFilters(RelationalIngestor ingestor, Datasets datasets) throws Exception @@ -179,7 +191,7 @@ protected IngestorResult executePlansAndVerifyResults(RelationalIngestor ingesto String expectedDataPath, Map expectedStats, boolean verifyStagingFilters) throws Exception { // Execute physical plans - IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets).get(0); Map actualStats = result.statisticByName(); @@ -214,12 +226,12 @@ protected IngestorResult executePlansAndVerifyResults(IngestMode ingestMode, Pla return executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPath, expectedStats, executionTimestampClock, Collections.emptySet(), false); } - protected List executePlansAndVerifyResultsWithDataSplits(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, List> expectedStats, List dataSplitRanges) throws Exception + protected List executePlansAndVerifyResultsWithSpecifiedDataSplits(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, List> expectedStats, List dataSplitRanges) throws Exception { - return executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPath, expectedStats, dataSplitRanges, Clock.systemUTC()); + return executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPath, expectedStats, dataSplitRanges, Clock.systemUTC()); } - protected List executePlansAndVerifyResultsWithDataSplits(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, List> expectedStats, List dataSplitRanges, Clock executionTimestampClock) throws Exception + protected List executePlansAndVerifyResultsWithSpecifiedDataSplits(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, List> expectedStats, List dataSplitRanges, Clock executionTimestampClock) throws Exception { RelationalIngestor ingestor = RelationalIngestor.builder() .ingestMode(ingestMode) @@ -247,6 +259,34 @@ protected List executePlansAndVerifyResultsWithDataSplits(Ingest return results; } + protected List executePlansAndVerifyResultsWithDerivedDataSplits(IngestMode ingestMode, PlannerOptions options, Datasets datasets, String[] schema, String expectedDataPath, List> expectedStats, Clock executionTimestampClock) throws Exception + { + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .executionTimestampClock(executionTimestampClock) + .cleanupStagingData(options.cleanupStagingData()) + .collectStatistics(options.collectStatistics()) + .enableSchemaEvolution(options.enableSchemaEvolution()) + .build(); + + List results = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + + List> tableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); + TestUtils.assertFileAndTableDataEquals(schema, expectedDataPath, tableData); + + for (int i = 0; i < results.size(); i++) + { + Map actualStats = results.get(i).statisticByName(); + Assertions.assertEquals(expectedStats.get(i).size(), actualStats.size()); + for (String statistic : expectedStats.get(i).keySet()) + { + Assertions.assertEquals(expectedStats.get(i).get(statistic).toString(), actualStats.get(StatisticName.valueOf(statistic)).toString()); + } + } + return results; + } + protected Map createExpectedStatsMap(int incomingRecordCount, int rowsDeleted, int rowsInserted, int rowsUpdated, int rowsTerminated) { Map expectedStats = new HashMap<>(); @@ -284,9 +324,10 @@ public IngestorResult executePlansAndVerifyForCaseConversion(RelationalIngestor datasets = ingestor.create(datasets); datasets = ingestor.evolve(datasets); + datasets = ingestor.dedupAndVersion(datasets); executor.begin(); - IngestorResult result = ingestor.ingest(datasets); + IngestorResult result = ingestor.ingest(datasets).get(0); // Do more stuff if needed executor.commit(); @@ -328,6 +369,26 @@ protected void loadBasicStagingDataInUpperCase(String path) throws Exception h2Sink.executeStatement(loadSql); } + protected void loadStagingDataWithNoPk(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + + "INSERT INTO \"TEST\".\"staging\"(name, income, expiry_date) " + + "SELECT \"name\", CONVERT( \"income\", BIGINT), CONVERT( \"expiry_date\", DATE)" + + " FROM CSVREAD( '" + path + "', 'name, income, expiry_date', NULL )"; + h2Sink.executeStatement(loadSql); + } + + protected void loadStagingDataWithNoPkInUpperCase(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"STAGING\";" + + "INSERT INTO \"TEST\".\"STAGING\"(NAME, INCOME, EXPIRY_DATE) " + + "SELECT \"NAME\", CONVERT( \"INCOME\", BIGINT), CONVERT( \"EXPIRY_DATE\", DATE)" + + " FROM CSVREAD( '" + path + "', 'NAME, INCOME, EXPIRY_DATE', NULL )"; + h2Sink.executeStatement(loadSql); + } + protected void loadStagingDataForWithPartition(String path) throws Exception { validateFileExists(path); @@ -338,6 +399,26 @@ protected void loadStagingDataForWithPartition(String path) throws Exception h2Sink.executeStatement(loadSql); } + protected void loadStagingDataForWithPartitionWithVersion(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + + "INSERT INTO \"TEST\".\"staging\"(date, entity, price, volume, digest, version) " + + "SELECT CONVERT( \"date\",DATE ), \"entity\", CONVERT( \"price\", DECIMAL(20,2)), CONVERT( \"volume\", BIGINT), \"digest\", CONVERT( \"version\",INT)" + + " FROM CSVREAD( '" + path + "', 'date, entity, price, volume, digest, version', NULL )"; + h2Sink.executeStatement(loadSql); + } + + protected void loadStagingDataForWithPartitionWithVersionInUpperCase(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"STAGING\";" + + "INSERT INTO \"TEST\".\"STAGING\"(DATE, ENTITY, PRICE, VOLUME, DIGEST, VERSION) " + + "SELECT CONVERT( \"DATE\",DATE ), \"ENTITY\", CONVERT( \"PRICE\", DECIMAL(20,2)), CONVERT( \"VOLUME\", BIGINT), \"DIGEST\", CONVERT( \"VERSION\",INT)" + + " FROM CSVREAD( '" + path + "', 'DATE, ENTITY, PRICE, VOLUME, DIGEST, VERSION', NULL )"; + h2Sink.executeStatement(loadSql); + } + protected void loadStagingDataWithDeleteInd(String path) throws Exception { validateFileExists(path); @@ -358,6 +439,16 @@ protected void loadStagingDataWithVersion(String path) throws Exception h2Sink.executeStatement(loadSql); } + protected void loadStagingDataWithVersionInUpperCase(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"STAGING\";" + + "INSERT INTO \"TEST\".\"STAGING\"(ID, NAME, INCOME, START_TIME ,EXPIRY_DATE, DIGEST, VERSION) " + + "SELECT CONVERT( \"ID\",INT ), \"NAME\", CONVERT( \"INCOME\", BIGINT), CONVERT( \"START_TIME\", DATETIME), CONVERT( \"EXPIRY_DATE\", DATE), DIGEST, CONVERT( \"VERSION\",INT)" + + " FROM CSVREAD( '" + path + "', 'ID, NAME, INCOME, START_TIME, EXPIRY_DATE, DIGEST, VERSION', NULL )"; + h2Sink.executeStatement(loadSql); + } + protected void loadStagingDataWithFilter(String path) throws Exception { validateFileExists(path); @@ -448,23 +539,23 @@ protected void loadStagingDataForBitemporalFromOnlyWithDeleteInd(String path) th h2Sink.executeStatement(loadSql); } - protected void loadStagingDataForBitemporalFromOnlyWithDataSplit(String path) throws Exception + protected void loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(String path) throws Exception { validateFileExists(path); String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + - "INSERT INTO \"TEST\".\"staging\"(index, datetime, balance, digest, data_split) " + - "SELECT CONVERT( \"index\", INT), CONVERT( \"datetime\", DATETIME), CONVERT( \"balance\", BIGINT), \"digest\", CONVERT( \"data_split\", BIGINT)" + - " FROM CSVREAD( '" + path + "', 'index, datetime, balance, digest, data_split', NULL )"; + "INSERT INTO \"TEST\".\"staging\"(index, datetime, balance, digest, version, data_split) " + + "SELECT CONVERT( \"index\", INT), CONVERT( \"datetime\", DATETIME), CONVERT( \"balance\", BIGINT), \"digest\", CONVERT( \"version\", BIGINT), CONVERT( \"data_split\", BIGINT)" + + " FROM CSVREAD( '" + path + "', 'index, datetime, balance, digest, version, data_split', NULL )"; h2Sink.executeStatement(loadSql); } - protected void loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(String path) throws Exception + protected void loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(String path) throws Exception { validateFileExists(path); String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + - "INSERT INTO \"TEST\".\"staging\"(index, datetime, balance, digest, delete_indicator, data_split) " + - "SELECT CONVERT( \"index\", INT), CONVERT( \"datetime\", DATETIME), CONVERT( \"balance\", BIGINT), \"digest\", \"delete_indicator\", CONVERT( \"data_split\", BIGINT)" + - " FROM CSVREAD( '" + path + "', 'index, datetime, balance, digest, delete_indicator, data_split', NULL )"; + "INSERT INTO \"TEST\".\"staging\"(index, datetime, balance, digest, version, delete_indicator, data_split) " + + "SELECT CONVERT( \"index\", INT), CONVERT( \"datetime\", DATETIME), CONVERT( \"balance\", BIGINT), \"digest\", CONVERT( \"version\", BIGINT), \"delete_indicator\", CONVERT( \"data_split\", BIGINT)" + + " FROM CSVREAD( '" + path + "', 'index, datetime, balance, digest, version, delete_indicator, data_split', NULL )"; h2Sink.executeStatement(loadSql); } @@ -498,7 +589,7 @@ protected void loadStagingDataForWithoutName(String path) throws Exception h2Sink.executeStatement(loadSql); } - protected void validateFileExists(String path) throws Exception + protected static void validateFileExists(String path) throws Exception { File f = new File(path); if (!f.exists()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java index d5cae4280ad..1ed5b1b8fd7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java @@ -253,6 +253,19 @@ public static SchemaDefinition getStagingSchemaWithVersion() .build(); } + public static SchemaDefinition getStagingSchemaWithNonPkVersion() + { + return SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(income) + .addFields(startTime) + .addFields(expiryDate) + .addFields(digest) + .addFields(version) + .build(); + } + public static SchemaDefinition getStagingSchemaWithFilterForDB() { return SchemaDefinition.builder() @@ -333,6 +346,15 @@ public static DatasetDefinition getBasicStagingTable() .build(); } + public static DatasetDefinition getStagingTableWithNoPks() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getSchemaWithNoPKs()) + .build(); + } + public static DatasetDefinition getBasicStagingTableWithExpiryDatePk() { return DatasetDefinition.builder() @@ -351,6 +373,15 @@ public static DatasetDefinition getStagingTableWithVersion() .build(); } + public static DatasetDefinition getStagingTableWithNonPkVersion() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getStagingSchemaWithNonPkVersion()) + .build(); + } + public static DatasetDefinition getStagingTableWithFilterForDB() { return DatasetDefinition.builder() @@ -671,6 +702,23 @@ public static DatasetDefinition getEntityPriceStagingTable() .build(); } + public static DatasetDefinition getEntityPriceWithVersionStagingTable() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(SchemaDefinition.builder() + .addFields(date) + .addFields(entity) + .addFields(price) + .addFields(volume) + .addFields(digest) + .addFields(version) + .build() + ) + .build(); + } + public static DatasetDefinition getBitemporalMainTable() { return DatasetDefinition.builder() @@ -790,6 +838,25 @@ public static DatasetDefinition getBitemporalFromOnlyMainTableIdBased() .build(); } + public static DatasetDefinition getBitemporalFromOnlyMainTableWithVersionIdBased() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(mainTableName) + .schema(SchemaDefinition.builder() + .addFields(index) + .addFields(balance) + .addFields(digest) + .addFields(version) + .addFields(startDateTime) + .addFields(endDateTime) + .addFields(batchIdIn) + .addFields(batchIdOut) + .build() + ) + .build(); + } + public static DatasetDefinition getBitemporalFromOnlyTempTableIdBased() { return DatasetDefinition.builder() @@ -808,6 +875,25 @@ public static DatasetDefinition getBitemporalFromOnlyTempTableIdBased() .build(); } + public static DatasetDefinition getBitemporalFromOnlyTempTableWithVersionIdBased() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(tempTableName) + .schema(SchemaDefinition.builder() + .addFields(index) + .addFields(balance) + .addFields(digest) + .addFields(version) + .addFields(startDateTime) + .addFields(endDateTime) + .addFields(batchIdIn) + .addFields(batchIdOut) + .build() + ) + .build(); + } + public static DatasetDefinition getBitemporalFromOnlyTempTableWithDeleteIndicatorIdBased() { return DatasetDefinition.builder() @@ -857,7 +943,7 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableWithoutDuplicat .build(); } - public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDataSplitIdBased() + public static DatasetDefinition getBitemporalFromOnlyStagingTableWithVersionWithDataSplitIdBased() { return DatasetDefinition.builder() .group(testSchemaName) @@ -867,6 +953,7 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDataSplitId .addFields(dateTime) .addFields(balance) .addFields(digest) + .addFields(version) .addFields(dataSplit) .build() ) @@ -889,7 +976,7 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDeleteIndic .build(); } - public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithDataSplitIdBased() + public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithVersionWithDataSplitIdBased() { return DatasetDefinition.builder() .group(testSchemaName) @@ -899,6 +986,7 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDeleteIndic .addFields(dateTime) .addFields(balance) .addFields(digest) + .addFields(version) .addFields(deleteIndicator) .addFields(dataSplit) .build() @@ -906,7 +994,7 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableWithDeleteIndic .build(); } - public static DatasetDefinition getBitemporalFromOnlyStagingTableWithoutDuplicatesWithDeleteIndicatorWithDataSplitIdBased() + public static DatasetDefinition getBitemporalFromOnlyStagingTableWithoutDuplicatesWithDeleteIndicatorWithVersionWithDataSplitIdBased() { return DatasetDefinition.builder() .group(testSchemaName) @@ -916,6 +1004,7 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableWithoutDuplicat .addFields(dateTime) .addFields(balance) .addFields(digest) + .addFields(version) .addFields(deleteIndicator) .addFields(dataSplit) .build() @@ -934,6 +1023,7 @@ public static DatasetDefinition getSchemaEvolutionAddColumnMainTable() .addFields(startTime) .addFields(expiryDate) .addFields(digest) + .addFields(batchUpdateTimestamp) .build()) .build(); } @@ -998,6 +1088,7 @@ public static DatasetDefinition getSchemaEvolutionDataTypeConversionMainTable() .addFields(startTime) .addFields(expiryDate) .addFields(digest) + .addFields(batchUpdateTimestamp) .build()) .build(); } @@ -1046,6 +1137,7 @@ public static DatasetDefinition getSchemaEvolutionDataTypeConversionAndColumnNul .addFields(startTime) .addFields(expiryDate) .addFields(digest) + .addFields(batchUpdateTimestamp) .build()) .build(); } @@ -1092,6 +1184,7 @@ public static DatasetDefinition getSchemaEvolutionPKTypeDifferentMainTable() .addFields(income) .addFields(expiryDate) .addFields(digest) + .addFields(batchUpdateTimestamp) .build()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java index 2c842b5ace9..f45582ddb34 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java @@ -18,12 +18,13 @@ import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.BitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromAndThruDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; @@ -33,7 +34,6 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -56,6 +56,7 @@ import static org.finos.legend.engine.persistence.components.TestUtils.indexName; import static org.finos.legend.engine.persistence.components.TestUtils.startDateTimeName; import static org.finos.legend.engine.persistence.components.TestUtils.valueName; +import static org.finos.legend.engine.persistence.components.TestUtils.versionName; class BitemporalDeltaWithBatchIdTest extends BaseTest { @@ -117,7 +118,7 @@ void testMilestoningSourceSpecifiesFromAndThrough() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "source_specifies_from_and_through/without_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "source_specifies_from_and_through/without_delete_ind/expected_pass3.csv"; // 1. Load staging table loadStagingDataForBitemp(dataPass3); @@ -183,7 +184,7 @@ void testMilestoningSourceSpecifiesFromAndThroughWithDeleteIndicator() throws Ex executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "source_specifies_from_and_through/with_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "source_specifies_from_and_through/with_delete_ind/expected_pass3.csv"; // 1. Load staging table loadStagingDataForBitempWithDeleteInd(dataPass3); @@ -238,7 +239,7 @@ void testMilestoningSourceSpecifiesFromAndThroughWithLessColumnsInStaging() thro executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "source_specifies_from_and_through/less_columns_in_staging/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "source_specifies_from_and_through/less_columns_in_staging/expected_pass3.csv"; stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMainForBitemp(dataPass3); // Execute plans and verify results @@ -500,10 +501,10 @@ void testMilestoningSourceSpecifiesFromSet2() throws Exception void testMilestoningSourceSpecifiesFromSet3WithDataSplit() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDataSplitIdBased(); - DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithVersionWithDataSplitIdBased(); + DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableWithVersionIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); @@ -512,7 +513,12 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplit() throws Exception BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -533,19 +539,19 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplit() throws Exception String dataPass1 = basePathForInput + "source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass2.csv"; String expectedDataPass4 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass4.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); @@ -555,21 +561,19 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplit() throws Exception expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); // ------------ Perform Pass3 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass3.csv"; - String expectedDataPass6 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass6.csv"; + String expectedDataPass6 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass5.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); - dataSplitRanges.add(DataSplitRange.of(2, 2)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); } /* @@ -579,10 +583,10 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplit() throws Exception void testMilestoningSourceSpecifiesFromSet3WithDataSplitMultiPasses() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDataSplitIdBased(); - DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithVersionWithDataSplitIdBased(); + DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableWithVersionIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); @@ -591,7 +595,12 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplitMultiPasses() throws Exc BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -612,25 +621,25 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplitMultiPasses() throws Exc String dataPass1 = basePathForInput + "source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass2.csv"; String expectedDataPass2 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass2.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); // ------------ Perform Pass3 ------------------------ String expectedDataPass3 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass3.csv"; @@ -639,7 +648,7 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplitMultiPasses() throws Exc dataSplitRanges.add(DataSplitRange.of(2, 3)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); // ------------ Perform Pass4 ------------------------ String expectedDataPass4 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass4.csv"; @@ -648,28 +657,19 @@ void testMilestoningSourceSpecifiesFromSet3WithDataSplitMultiPasses() throws Exc dataSplitRanges.add(DataSplitRange.of(50, 100)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); // ------------ Perform Pass5 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass3.csv"; String expectedDataPass5 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass5.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass5, expectedStats, dataSplitRanges); - - // ------------ Perform Pass6 (identical records) ------------------------ - String expectedDataPass6 = basePathForExpected + "source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass6.csv"; - // 2. Execute Plan and Verify Results - dataSplitRanges = new ArrayList<>(); - dataSplitRanges.add(DataSplitRange.of(2, 2)); - expectedStats = new ArrayList<>(); - expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass5, expectedStats, dataSplitRanges); } /* @@ -705,7 +705,7 @@ void testMilestoningSourceSpecifiesFromSet4FilterDuplicates() throws Exception .sourceDateTimeFromField(dateTimeName) .build()) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); @@ -772,11 +772,11 @@ void testMilestoningSourceSpecifiesFromSet4FilterDuplicates() throws Exception @Test void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throws Exception { - DatasetDefinition mainTable = TestUtils.getBitemporalFromOnlyMainTableIdBased(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDataSplitIdBased(); - DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableIdBased(); + DatasetDefinition mainTable = TestUtils.getBitemporalFromOnlyMainTableWithVersionIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithVersionWithDataSplitIdBased(); + DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableWithVersionIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); @@ -785,7 +785,12 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throw BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -797,7 +802,7 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throw .sourceDateTimeFromField(dateTimeName) .build()) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -807,19 +812,19 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throw String dataPass1 = basePathForInput + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv"; String expectedDataPass4 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); @@ -829,21 +834,19 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throw expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); // ------------ Perform Pass3 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv"; - String expectedDataPass6 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass6.csv"; + String expectedDataPass6 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass5.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); - dataSplitRanges.add(DataSplitRange.of(2, 2)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 0, 0)); - expectedStats.add(createExpectedStatsMap(1, 0, 0, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); } /* @@ -853,10 +856,10 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throw void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicatesMultiPasses() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDataSplitIdBased(); - DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithVersionWithDataSplitIdBased(); + DatasetDefinition tempTable = TestUtils.getBitemporalFromOnlyTempTableWithVersionIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); @@ -865,7 +868,12 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicatesMultiPas BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -877,7 +885,7 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicatesMultiPas .sourceDateTimeFromField(dateTimeName) .build()) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -887,25 +895,25 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicatesMultiPas String dataPass1 = basePathForInput + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv"; String expectedDataPass2 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); // ------------ Perform Pass3 ------------------------ String expectedDataPass3 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv"; @@ -914,7 +922,7 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicatesMultiPas dataSplitRanges.add(DataSplitRange.of(2, 3)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); // ------------ Perform Pass4 ------------------------ String expectedDataPass4 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv"; @@ -923,28 +931,19 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicatesMultiPas dataSplitRanges.add(DataSplitRange.of(50, 100)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); // ------------ Perform Pass5 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv"; String expectedDataPass5 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass5.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(1, 1)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass5, expectedStats, dataSplitRanges); - - // ------------ Perform Pass6 (identical records) ------------------------ - String expectedDataPass6 = basePathForExpected + "source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass6.csv"; - // 2. Execute Plan and Verify Results - dataSplitRanges = new ArrayList<>(); - dataSplitRanges.add(DataSplitRange.of(2, 2)); - expectedStats = new ArrayList<>(); - expectedStats.add(createExpectedStatsMap(1, 0, 0, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass5, expectedStats, dataSplitRanges); } /* @@ -1109,17 +1108,22 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet2() throws Exceptio @Test void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplit() throws Exception { - DatasetDefinition mainTable = TestUtils.getBitemporalFromOnlyMainTableIdBased(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithDataSplitIdBased(); + DatasetDefinition mainTable = TestUtils.getBitemporalFromOnlyMainTableWithVersionIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithVersionWithDataSplitIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -1144,19 +1148,19 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplit() th String dataPass1 = basePathForInput + "source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(5, 5)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass2.csv"; String expectedDataPass3 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass3.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(0, 1)); @@ -1164,19 +1168,19 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplit() th expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 1)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); // ------------ Perform Pass3 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass3.csv"; String expectedDataPass4 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass4.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(70, 70)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 0, 2, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); } /* @@ -1186,16 +1190,21 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplit() th void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplitWithMultiplePasses() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithDataSplitIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithVersionWithDataSplitIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -1220,25 +1229,25 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplitWithM String dataPass1 = basePathForInput + "source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(5, 5)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass2.csv"; String expectedDataPass2 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass2.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(0, 1)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); // ------------ Perform Pass3 ------------------------ String expectedDataPass3 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass3.csv"; @@ -1247,19 +1256,19 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet3WithDataSplitWithM dataSplitRanges.add(DataSplitRange.of(2, 2)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 1)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); // ------------ Perform Pass4 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass3.csv"; String expectedDataPass4 = basePathForExpected + "source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass4.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(70, 71)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 0, 2, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); } /* @@ -1298,7 +1307,7 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet4FilterDuplicates() .deleteField(deleteIndicatorName) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); @@ -1366,10 +1375,10 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet4FilterDuplicates() void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilterDuplicates() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithDataSplitIdBased(); - DatasetDefinition stagingTableWithoutDuplicates = TestUtils.getBitemporalFromOnlyStagingTableWithoutDuplicatesWithDeleteIndicatorWithDataSplitIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithVersionWithDataSplitIdBased(); + DatasetDefinition stagingTableWithoutDuplicates = TestUtils.getBitemporalFromOnlyStagingTableWithoutDuplicatesWithDeleteIndicatorWithVersionWithDataSplitIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); @@ -1378,7 +1387,12 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -1394,7 +1408,7 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte .deleteField(deleteIndicatorName) .addAllDeleteValues(Arrays.asList(deleteIndicatorValuesEdgeCase)) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -1404,19 +1418,19 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte String dataPass1 = basePathForInput + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(5, 5)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv"; String expectedDataPass3 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(0, 1)); @@ -1424,19 +1438,19 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 1)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); // ------------ Perform Pass3 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv"; String expectedDataPass4 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(5, 100)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 0, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); } /* @@ -1445,11 +1459,11 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte @Test void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilterDuplicatesWithMultiplePasses() throws Exception { - DatasetDefinition mainTable = TestUtils.getBitemporalFromOnlyMainTableIdBased(); - DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithDataSplitIdBased(); - DatasetDefinition stagingTableWithoutDuplicates = TestUtils.getBitemporalFromOnlyStagingTableWithoutDuplicatesWithDeleteIndicatorWithDataSplitIdBased(); + DatasetDefinition mainTable = TestUtils.getBitemporalFromOnlyMainTableWithVersionIdBased(); + DatasetDefinition stagingTable = TestUtils.getBitemporalFromOnlyStagingTableWithDeleteIndicatorWithVersionWithDataSplitIdBased(); + DatasetDefinition stagingTableWithoutDuplicates = TestUtils.getBitemporalFromOnlyStagingTableWithoutDuplicatesWithDeleteIndicatorWithVersionWithDataSplitIdBased(); - String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + String[] schema = new String[] {indexName, balanceName, digestName, versionName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; // Create staging table createStagingTable(stagingTable); @@ -1458,7 +1472,12 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -1474,7 +1493,7 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte .deleteField(deleteIndicatorName) .addAllDeleteValues(Arrays.asList(deleteIndicatorValuesEdgeCase)) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -1484,25 +1503,25 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte String dataPass1 = basePathForInput + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv"; String expectedDataPass1 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass1); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass1); // 2. Execute Plan and Verify Results List dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(5, 5)); List> expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 2, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, dataSplitRanges); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv"; String expectedDataPass2 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass2); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass2); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(0, 1)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 1, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, dataSplitRanges); // ------------ Perform Pass3 ------------------------ String expectedDataPass3 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv"; @@ -1511,18 +1530,18 @@ void testMilestoningSourceSpecifiesFromWithDeleteIndicatorSet5WithDataSplitFilte dataSplitRanges.add(DataSplitRange.of(2, 2)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(1, 0, 0, 1, 1)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, dataSplitRanges); // ------------ Perform Pass4 (identical records) ------------------------ String dataPass3 = basePathForInput + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv"; String expectedDataPass4 = basePathForExpected + "source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv"; // 1. Load Staging table - loadStagingDataForBitemporalFromOnlyWithDeleteIndWithDataSplit(dataPass3); + loadStagingDataForBitemporalFromOnlyWithDeleteIndWithVersionWithDataSplit(dataPass3); // 2. Execute Plan and Verify Results dataSplitRanges = new ArrayList<>(); dataSplitRanges.add(DataSplitRange.of(0, 100)); expectedStats = new ArrayList<>(); expectedStats.add(createExpectedStatsMap(2, 0, 0, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass4, expectedStats, dataSplitRanges); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalSnapshotWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalSnapshotWithBatchIdTest.java index 59798aef55a..242816c9ce2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalSnapshotWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalSnapshotWithBatchIdTest.java @@ -113,7 +113,7 @@ void testBitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); // ------------ Perform bitemporal snapshot milestoning Pass4 (Empty Batch) ------------------------ - String dataPass4 = basePathForInput + "without_partition/staging_data_pass4.csv"; + String dataPass4 = "src/test/resources/data/empty_file.csv"; String expectedDataPass4 = basePathForExpected + "without_partition/expected_pass4.csv"; // 1. Load Staging table loadStagingDataForBitemp(dataPass4); @@ -181,7 +181,7 @@ void testBitemporalSnapshotMilestoningLogicHasFromTimeOnly() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); // ------------ Perform bitemporal snapshot milestoning Pass4 (Empty Batch) ------------------------ - String dataPass4 = basePathForInput + "has_from_time_only/staging_data_pass4.csv"; + String dataPass4 = "src/test/resources/data/empty_file.csv"; String expectedDataPass4 = basePathForExpected + "has_from_time_only/expected_pass4.csv"; // 1. Load Staging table loadStagingDataForBitemp(dataPass4); @@ -250,7 +250,7 @@ void testBitemporalSnapshotMilestoningLogicWithPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); // ------------ Perform bitemporal snapshot milestoning Pass4 (Empty Batch) ------------------------ - String dataPass4 = basePathForInput + "with_partition/staging_data_pass4.csv"; + String dataPass4 = "src/test/resources/data/empty_file.csv"; String expectedDataPass4 = basePathForExpected + "with_partition/expected_pass4.csv"; // 1. Load Staging table loadStagingDataForBitemp(dataPass4); @@ -315,7 +315,7 @@ void testBitemporalSnapshotMilestoningLogicWithLessColumnsInStaging() throws Exc Scenario: Test milestoning Logic when staging table is pre populated and staging table is cleaned up in the end */ @Test - void testBitemporalSnapshotMilestoningLogicWithPartitionWithcleanStagingData() throws Exception + void testBitemporalSnapshotMilestoningLogicWithPartitionWithCleanStagingData() throws Exception { DatasetDefinition mainTable = TestUtils.getBitemporalMainTable(); DatasetDefinition stagingTable = TestUtils.getBitemporalStagingTable(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 881a54ee20d..4f09b145a66 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -137,12 +137,12 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoTaskId() throws Exce String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + - "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv'," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv'," + "'col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); // Verify execution using ingestor @@ -284,11 +284,11 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"batch_id\", \"append_time\") " + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"])," + - "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00.000000' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); // Verify execution using ingestor @@ -360,12 +360,12 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + "SELECT CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + "LAKEHOUSE_MD5(ARRAY['COL_INT','COL_STRING','COL_DECIMAL','COL_DATETIME'],ARRAY[\"COL_INT\",\"COL_STRING\",\"COL_DECIMAL\",\"COL_DATETIME\"])," + - "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00' " + + "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00.000000' " + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file4.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); // Verify execution using ingestor diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java index ef070aaa5b0..95135656318 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java @@ -87,7 +87,7 @@ public void testMultiIngestionTypes() throws Exception .enableConcurrentSafety(true) .build(); - IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets).get(0); MultiTableIngestionTest.verifyResults(1, schema, expectedPath, "main", result, expectedStats); // Pass 2 : unitemporalDelta @@ -106,7 +106,7 @@ public void testMultiIngestionTypes() throws Exception .enableConcurrentSafety(true) .build(); - result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets).get(0); MultiTableIngestionTest.verifyResults(2, schema, expectedPath, "main", result, expectedStats); // Pass 3 : unitemporalSnapshot @@ -125,7 +125,7 @@ public void testMultiIngestionTypes() throws Exception .enableConcurrentSafety(true) .build(); - result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets).get(0); MultiTableIngestionTest.verifyResults(3, schema, expectedPath, "main", result, expectedStats); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java index 28c7995ade2..ca85039087d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java @@ -90,7 +90,7 @@ public void run() .executionTimestampClock(clock) .build(); - IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets).get(0); if (maxBatchIdCounter.get() < result.batchId().get()) { maxBatchIdCounter.set(result.batchId().get()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 30e8b98d60f..11d59c8ad52 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -22,11 +22,15 @@ import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; -import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; @@ -40,274 +44,367 @@ import java.util.Map; import static org.finos.legend.engine.persistence.components.TestUtils.batchUpdateTimeName; +import static org.finos.legend.engine.persistence.components.TestUtils.dataSplitName; import static org.finos.legend.engine.persistence.components.TestUtils.digestName; import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; import static org.finos.legend.engine.persistence.components.TestUtils.idName; import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; import static org.finos.legend.engine.persistence.components.TestUtils.nameName; import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.dataSplitName; +import static org.finos.legend.engine.persistence.components.TestUtils.versionName; class AppendOnlyTest extends BaseTest { private final String basePath = "src/test/resources/data/incremental-append-milestoning/"; /* Scenarios: - 1. FilterDuplicates and No Auditing - 2. Staging data is imported along with Digest field population - 3. Staging has lesser columns than main dataset - 4. Staging data cleanup - 5. FilterDuplicates and Auditing enabled - 6. Add column schema evolution - 7. implicit data type change schema evolution - 8. Filter Duplicates and Data Splits enabled + 1) With Auditing, NoVersion, Filter Duplicates, true - tested (perform deduplication, auditing, filter existing) + 2) No Auditing, NoVersion, Allow Duplicates, false - tested (the most basic case) + 3) With Auditing, MaxVersion, Filter Duplicates, true - tested (perform deduplication and versioning, auditing, filter existing) + 4) With Auditing, MaxVersion, Filter Duplicates, false - tested (perform deduplication and versioning, auditing) + 5) With Auditing, AllVersion, Filter Duplicates, true - tested (perform deduplication and versioning, data split, auditing, filter existing) + 6) With Auditing, AllVersion, Filter Duplicates, false - tested (perform deduplication and versioning, data split, auditing) + + Other enrichment tests: + 1) Staging data is imported along with Digest field population + 2) Staging has lesser columns than main dataset + 3) Do no create table */ - /* - Scenario: Test Append Only Logic with FilterDuplicates and No Auditing + Scenario: Test Append Only vanilla case + staging table is cleaned up in the end with upper case (2) */ @Test - void testAppendOnlyWithFilterDuplicatesAndNoAuditing() throws Exception + void testAppendOnlyVanillaUpperCase() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNoPks(); // Create staging table - createStagingTable(stagingTable); + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"EXPIRY_DATE\" DATE)"); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) .build(); - PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(true).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{nameName.toUpperCase(), incomeName.toUpperCase(), expiryDateName.toUpperCase()}; - // ------------ Perform incremental (append) milestoning Pass1 ------------------------ + // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ String dataPass1 = basePath + "input/vanilla_case/data_pass1.csv"; String expectedDataPass1 = basePath + "expected/vanilla_case/expected_pass1.csv"; // 1. Load staging table - loadBasicStagingData(dataPass1); + loadStagingDataWithNoPkInUpperCase(dataPass1); // 2. Execute plans and verify results Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - // 3. Assert that the staging table is NOT truncated - List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 3); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + // 3. Assert that the staging table is truncated + List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); + Assertions.assertEquals(stagingTableList.size(), 0); - // ------------ Perform incremental (append) milestoning Pass2 ------------------------ + // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ String dataPass2 = basePath + "input/vanilla_case/data_pass2.csv"; String expectedDataPass2 = basePath + "expected/vanilla_case/expected_pass2.csv"; // 1. Load staging table - loadBasicStagingData(dataPass2); + loadStagingDataWithNoPkInUpperCase(dataPass2); // 2. Execute plans and verify results - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); + expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + // 3. Assert that the staging table is truncated + stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); + Assertions.assertEquals(stagingTableList.size(), 0); } /* - Scenario: Test Append Only Logic with FilterDuplicates and No Auditing with Upper Case Optimizer + Scenario: Test Append Only with auditing, no versioning, filter duplicates and filter existing records (1) */ @Test - void testAppendOnlyWithFilterDuplicatesAndNoAuditingWithUpperCaseOptimizer() throws Exception + void testAppendOnlyWithAuditingNoVersioningFilterDuplicatesFilterExistingRecords() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); // Create staging table - h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"ID\" INTEGER NOT NULL,\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"START_TIME\" TIMESTAMP NOT NULL,\"EXPIRY_DATE\" DATE,\"DIGEST\" VARCHAR,PRIMARY KEY (\"ID\", \"START_TIME\"))"); + createStagingTableWithoutPks(stagingTable); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); + .digestField(digestName) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(true) + .build(); - PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName.toUpperCase(), nameName.toUpperCase(), incomeName.toUpperCase(), startTimeName.toUpperCase(), expiryDateName.toUpperCase(), digestName.toUpperCase()}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String dataPass1 = basePath + "input/vanilla_case/data_pass1.csv"; - String expectedDataPass1 = basePath + "expected/vanilla_case/expected_pass1.csv"; + String dataPass1 = basePath + "input/auditing_no_version_filter_dup_filter_existing/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/auditing_no_version_filter_dup_filter_existing/expected_pass1.csv"; // 1. Load staging table - loadBasicStagingDataInUpperCase(dataPass1); + loadBasicStagingData(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + // ------------ Perform incremental (append) milestoning Pass2 ------------------------ + String dataPass2 = basePath + "input/auditing_no_version_filter_dup_filter_existing/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/auditing_no_version_filter_dup_filter_existing/expected_pass2.csv"; + // 1. Load staging table + loadBasicStagingData(dataPass2); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats = createExpectedStatsMap(4, 0, 2, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); + } - List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); - Assertions.assertEquals(stagingTableList.size(), 3); + /* + Scenario: Test Append Only with auditing, max version, filter duplicates and filter existing records with upper case (3) + */ + @Test + void testAppendOnlyWithAuditingMaxVersionFilterDuplicatesFilterExistingRecordsUpperCase() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNonPkVersion(); - executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + // Create staging table + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"ID\" INTEGER NOT NULL,\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"START_TIME\" TIMESTAMP NOT NULL,\"EXPIRY_DATE\" DATE,\"DIGEST\" VARCHAR,\"VERSION\" INT)"); - // 3. Assert that the staging table is NOT truncated - stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); - Assertions.assertEquals(stagingTableList.size(), 3); + // Generate the milestoning object + AppendOnly ingestMode = AppendOnly.builder() + .digestField(digestName) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(true) + .build(); + + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName.toUpperCase(), nameName.toUpperCase(), incomeName.toUpperCase(), startTimeName.toUpperCase(), expiryDateName.toUpperCase(), digestName.toUpperCase(), versionName.toUpperCase(), batchUpdateTimeName.toUpperCase()}; + + // ------------ Perform incremental (append) milestoning Pass1 ------------------------ + String dataPass1 = basePath + "input/auditing_max_version_filter_dup_filter_existing/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/auditing_max_version_filter_dup_filter_existing/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithVersionInUpperCase(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(4, 0, 3, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); // ------------ Perform incremental (append) milestoning Pass2 ------------------------ - String dataPass2 = basePath + "input/vanilla_case/data_pass2.csv"; - String expectedDataPass2 = basePath + "expected/vanilla_case/expected_pass2.csv"; + String dataPass2 = basePath + "input/auditing_max_version_filter_dup_filter_existing/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/auditing_max_version_filter_dup_filter_existing/expected_pass2.csv"; // 1. Load staging table - loadBasicStagingDataInUpperCase(dataPass2); + loadStagingDataWithVersionInUpperCase(dataPass2); // 2. Execute plans and verify results - executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + expectedStats = createExpectedStatsMap(4, 0, 2, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); } /* - Scenario: test AppendOnly when staging data is imported along with Digest field population + Scenario: Test Append Only with auditing, max version, filter duplicates and no filter existing records (4) */ @Test - void testAppendOnlyWithStagingDataImportedWithPopulateDigest() throws Exception + void testAppendOnlyWithAuditingMaxVersionFilterDuplicatesNoFilterExistingRecords() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - String dataPass1 = "src/test/resources/data/import-data/data_pass1.json"; - Dataset stagingTable = TestUtils.getJsonDatasetWithoutDigestReferenceTable(dataPass1); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNonPkVersion(); + + // Create staging table + createStagingTableWithoutPks(stagingTable); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(false) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, versionName, batchUpdateTimeName}; // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String expectedDataPass1 = "src/test/resources/data/import-data/data_expected_with_digest_pass1.csv"; - // Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 5); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + String dataPass1 = basePath + "input/auditing_max_version_filter_dup_no_filter_existing/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(4, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); // ------------ Perform incremental (append) milestoning Pass2 ------------------------ - String dataPass2 = "src/test/resources/data/import-data/data_pass2.json"; - stagingTable = TestUtils.getJsonDatasetWithoutDigestReferenceTable(dataPass2); - String expectedDataPass2 = "src/test/resources/data/import-data/data_expected_with_digest_pass2.csv"; - // Execute plans and verify results - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 2); - executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats); + String dataPass2 = basePath + "input/auditing_max_version_filter_dup_no_filter_existing/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(4, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); } /* - Scenario: Test AppendOnly when staging has lesser columns than main + Scenario: Test Append Only with auditing, all version, filter duplicates and filter existing records (5) */ @Test - void testAppendOnlyWithLessColumnsInStaging() throws Exception + void testAppendOnlyWithAuditingAllVersionFilterDuplicatesFilterExistingRecords() throws Exception { - DatasetDefinition mainTable = TestUtils.getBasicMainTable(); - String dataPass1 = basePath + "input/less_columns_in_staging/data_pass1.csv"; - Dataset stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass1); + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNonPkVersion(); + IncrementalClock incrementalClock = new IncrementalClock(fixedExecutionZonedDateTime1.toInstant(), ZoneOffset.UTC, 1000); + + // Create staging table + createStagingTableWithoutPks(stagingTable); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, versionName, batchUpdateTimeName}; // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String expectedDataPass1 = basePath + "expected/less_columns_in_staging/expected_pass1.csv"; - // Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + String dataPass1 = basePath + "input/auditing_all_version_filter_dup_filter_existing/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/auditing_all_version_filter_dup_filter_existing/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass1); + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = createExpectedStatsMap(3, 0, 3, 0, 0); + Map expectedStats2 = createExpectedStatsMap(1, 0, 1, 0, 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, incrementalClock); // ------------ Perform incremental (append) milestoning Pass2 ------------------------ - String dataPass2 = basePath + "input/less_columns_in_staging/data_pass2.csv"; - stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass2); - String expectedDataPass2 = basePath + "expected/less_columns_in_staging/expected_pass2.csv"; - // Execute plans and verify results - executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats); + String dataPass2 = basePath + "input/auditing_all_version_filter_dup_filter_existing/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/auditing_all_version_filter_dup_filter_existing/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass2); + // 2. Execute plans and verify results + expectedStatsList = new ArrayList<>(); + expectedStats1 = createExpectedStatsMap(4, 0, 2, 0, 0); + expectedStatsList.add(expectedStats1); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, incrementalClock); } /* - Scenario: Test AppendOnly when staging table is cleaned up in the end + Scenario: Test Append Only with auditing, all version, filter duplicates and no filter existing records (6) */ @Test - void testAppendOnlyWithCleanStagingData() throws Exception + void testAppendOnlyWithAuditingAllVersionFilterDuplicatesNoFilterExistingRecords() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNonPkVersion(); + IncrementalClock incrementalClock = new IncrementalClock(fixedExecutionZonedDateTime1.toInstant(), ZoneOffset.UTC, 1000); // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(false) .build(); - PlannerOptions options = PlannerOptions.builder().cleanupStagingData(true).collectStatistics(true).build(); + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, versionName, batchUpdateTimeName}; - // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ - String dataPass1 = basePath + "input/vanilla_case/data_pass1.csv"; - String expectedDataPass1 = basePath + "expected/vanilla_case/expected_pass1.csv"; + // ------------ Perform incremental (append) milestoning Pass1 ------------------------ + String dataPass1 = basePath + "input/auditing_all_version_filter_dup_no_filter_existing/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass1.csv"; // 1. Load staging table - loadBasicStagingData(dataPass1); + loadStagingDataWithVersion(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = createExpectedStatsMap(3, 0, 3, 0, 0); + Map expectedStats2 = createExpectedStatsMap(1, 0, 1, 0, 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, incrementalClock); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - // 3. Assert that the staging table is truncated - List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 0); + // ------------ Perform incremental (append) milestoning Pass1 ------------------------ + String dataPass2 = basePath + "input/auditing_all_version_filter_dup_no_filter_existing/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass2); + // 2. Execute plans and verify results + expectedStatsList = new ArrayList<>(); + expectedStats1 = createExpectedStatsMap(4, 0, 3, 0, 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, incrementalClock); } /* - Scenario: Test AppendOnly with FilterDuplicates and Auditing enabled + Scenario: test Append Only with auditing, no version, allow duplicates and filter existing records when staging data is imported along with digest field population */ @Test - void testAppendOnlyWithFilterDuplicatesAndAuditingEnabled() throws Exception + void testAppendOnlyWithStagingDataImportedWithPopulateDigest() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - String dataPass1 = basePath + "input/with_update_timestamp_field/data_pass1.csv"; - Dataset stagingTable = TestUtils.getBasicCsvDatasetReferenceTable(dataPass1); + String dataPass1 = basePath + "input/import_with_populate_digest/data_pass1.json"; + Dataset stagingTable = TestUtils.getJsonDatasetWithoutDigestReferenceTable(dataPass1); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(true) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); @@ -316,67 +413,79 @@ void testAppendOnlyWithFilterDuplicatesAndAuditingEnabled() throws Exception String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String expectedDataPass1 = basePath + "expected/with_update_timestamp_field/expected_pass1.csv"; + String expectedDataPass1 = basePath + "expected/import_with_populate_digest/expected_pass1.csv"; // Execute plans and verify results - Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + Map expectedStats = createExpectedStatsMap(5, 0, 5, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform incremental (append) milestoning Pass2 ------------------------ + String dataPass2 = basePath + "input/import_with_populate_digest/data_pass2.json"; + stagingTable = TestUtils.getJsonDatasetWithoutDigestReferenceTable(dataPass2); + String expectedDataPass2 = basePath + "expected/import_with_populate_digest/expected_pass2.csv"; + // Execute plans and verify results + expectedStats = createExpectedStatsMap(2, 0, 1, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); } /* - Scenario: Test AppendOnly with Filter Duplicates and Data Splits enabled + Scenario: Test Append Only with auditing, no version, allow duplicates and no filter existing records when staging has lesser columns than main */ @Test - void testAppendOnlyWithFilterDuplicatesAuditEnabledWithDataSplits() throws Exception + void testAppendOnlyWithLessColumnsInStaging() throws Exception { - DatasetDefinition mainTable = TestUtils.getMainTableWithBatchUpdateTimeField(); - String dataPass1 = basePath + "input/with_data_splits/data_pass1.csv"; - Dataset stagingTable = TestUtils.getBasicCsvDatasetReferenceTableWithDataSplits(dataPass1); - IncrementalClock incrementalClock = new IncrementalClock(fixedExecutionZonedDateTime1.toInstant(), ZoneOffset.UTC, 1000); + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + String dataPass1 = basePath + "input/less_columns_in_staging/data_pass1.csv"; + Dataset stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass1); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) - .dataSplitField(dataSplitName) - .build(); + .digestField(digestName) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(false) + .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{batchUpdateTimeName, idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String expectedDataPass1 = basePath + "expected/with_data_splits/expected_pass1.csv"; + String expectedDataPass1 = basePath + "expected/less_columns_in_staging/expected_pass1.csv"; // Execute plans and verify results - List dataSplitRanges = new ArrayList<>(); - dataSplitRanges.add(DataSplitRange.of(1, 1)); - dataSplitRanges.add(DataSplitRange.of(2, 2)); - List> expectedStatsList = new ArrayList<>(); - Map expectedStats1 = createExpectedStatsMap(3, 0, 3, 0, 0); - Map expectedStats2 = createExpectedStatsMap(2, 0, 2, 0, 0); - - expectedStatsList.add(expectedStats1); - expectedStatsList.add(expectedStats2); + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, dataSplitRanges, incrementalClock); + // ------------ Perform incremental (append) milestoning Pass2 ------------------------ + String dataPass2 = basePath + "input/less_columns_in_staging/data_pass2.csv"; + stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass2); + String expectedDataPass2 = basePath + "expected/less_columns_in_staging/expected_pass2.csv"; + // Execute plans and verify results + expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); } + /* + Scenario: Test Append Only vanilla case with do not create table + */ @Test void testAppendOnlyDoNotCreateTables() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNoPks(); // Create staging table createStagingTable(stagingTable); // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); + .digestField(digestName) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) + .build(); Datasets datasets = Datasets.of(mainTable, stagingTable); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyWithDuplicatesTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyWithDuplicatesTest.java deleted file mode 100644 index a9d35446be8..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyWithDuplicatesTest.java +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2022 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; - -import org.finos.legend.engine.persistence.components.BaseTest; -import org.finos.legend.engine.persistence.components.TestUtils; -import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.StatisticName; -import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; -import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; -import org.finos.legend.engine.persistence.components.planner.PlannerOptions; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.getSchemaWithNoPKs; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; - -class AppendOnlyWithDuplicatesTest extends BaseTest -{ - private final String basePath = "src/test/resources/data/incremental-append-milestoning/"; - /* - Scenarios: - 1. Allow Duplicates where PKs are provided - 2. Allow Duplicates where no PKs are provided - 3. FAIL_ON_DUPLICATES validation with primary keys empty - 4. FAIL_ON_DUPLICATES causing the test to fail - */ - - /* - Scenario: Test Append Only with ALLOW_DUPLICATES validation when primary keys are not empty - */ - @Test - void testAppendOnlyWithAllowDuplicatesWherePKsNotEmpty() throws Exception - { - DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); - - // Create staging table - createStagingTable(stagingTable); - - // Generate the milestoning object - AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(AllowDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; - - String expectedDataPass1 = basePath + "expected/allow_duplicates/expected_pass1.csv"; - // Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); - - try - { - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - - Assertions.fail("Exception was not thrown"); - } - catch (Exception e) - { - Assertions.assertEquals("Primary key list must be empty", e.getMessage()); - } - } - - /* - Scenario: Test Append Only with ALLOW_DUPLICATES and no PKs - */ - @Test - void testAppendOnlyWithAllowDuplicates() throws Exception - { - DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - String dataPass1 = basePath + "input/allow_duplicates/data_pass1.csv"; - Dataset stagingTable = CsvExternalDatasetReference.builder().schema(getSchemaWithNoPKs()).csvDataPath(dataPass1).build(); - - // Generate the milestoning object - AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(AllowDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{nameName, incomeName, expiryDateName}; - - // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String expectedDataPass1 = basePath + "expected/allow_duplicates/expected_pass1.csv"; - // Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - - // ------------ Perform incremental (append) milestoning Pass2 ------------------------ - String dataPass2 = basePath + "input/allow_duplicates/data_pass2.csv"; - stagingTable = CsvExternalDatasetReference.builder().schema(getSchemaWithNoPKs()).csvDataPath(dataPass1).build(); - String expectedDataPass2 = basePath + "expected/allow_duplicates/expected_pass2.csv"; - // Execute plans and verify results - executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats); - } - - /* - Scenario: FAIL_ON_DUPLICATES validation with primary keys empty - */ - @Test - void testAppendOnlyWithFailOnDuplicatesValidation() throws Exception - { - DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - String dataPass1 = basePath + "input/allow_duplicates/data_pass1.csv"; - Dataset stagingTable = CsvExternalDatasetReference.builder().schema(getSchemaWithNoPKs()).csvDataPath(dataPass1).build(); - - // Generate the milestoning object - AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(FailOnDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{nameName, incomeName, expiryDateName}; - - String expectedDataPass1 = basePath + "expected/allow_duplicates/expected_pass1.csv"; - // Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); - try - { - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - - Assertions.fail("Exception was not thrown"); - } - catch (Exception e) - { - Assertions.assertEquals("Primary key list must not be empty", e.getMessage()); - } - } - - /* - Scenario: Test Append Only with FAIL_ON_DUPLICATES strategy will cause the test to fail - */ - @Test - void testAppendOnlyWithFailOnDuplicates() throws Exception - { - DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); - - // Create staging table - createStagingTable(stagingTable); - - // Generate the milestoning object - AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) - .deduplicationStrategy(FailOnDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; - - // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String dataPass1 = basePath + "input/vanilla_case/data_pass1.csv"; - String expectedDataPass1 = basePath + "expected/vanilla_case/expected_pass1.csv"; - // 1. Load staging table - loadBasicStagingData(dataPass1); - // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); - - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - // 3. Assert that the staging table is NOT truncated - List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 3); - - // ------------ Perform incremental (append) milestoning Pass2 ------------------------ - String dataPass2 = basePath + "input/vanilla_case/data_pass2.csv"; - String expectedDataPass2 = basePath + "expected/vanilla_case/expected_pass2.csv"; - // 1. Load staging table - loadBasicStagingData(dataPass2); - // 2. Execute plans and verify results - try - { - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); - Assertions.fail("Exception was not thrown"); - } - catch (Exception e) - { - Assertions.assertTrue(e.getMessage().contains("Unique index or primary key violation")); - } - } -} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaMergeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaMergeTest.java deleted file mode 100644 index f6127663533..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaMergeTest.java +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright 2022 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.ingestmode.nontemporal; - -import org.finos.legend.engine.persistence.components.BaseTest; -import org.finos.legend.engine.persistence.components.TestUtils; -import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.StatisticName; -import org.finos.legend.engine.persistence.components.ingestmode.NontemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; -import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; -import org.finos.legend.engine.persistence.components.planner.PlannerOptions; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.finos.legend.engine.persistence.components.TestUtils.batchUpdateTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; - -class NontemporalDeltaMergeTest extends BaseTest -{ - private final String basePath = "src/test/resources/data/incremental-delta-milestoning/"; - - /* - Scenario: Test milestoning Logic when staging table pre populated - */ - @Test - void testMilestoningStagingTablePrePopulated() throws Exception - { - DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); - - // Create staging table - createStagingTable(stagingTable); - - // Generate the milestoning object - NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; - - // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ - String dataPass1 = basePath + "input/vanilla_case/data_pass1.csv"; - String expectedDataPass1 = basePath + "expected/vanilla_case/expected_pass1.csv"; - // 1. Load staging table - loadBasicStagingData(dataPass1); - // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - // 3. Assert that the staging table is NOT truncated - List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 3); - - // ------------ Perform incremental (delta) milestoning Pass2 ------------------------ - String dataPass2 = basePath + "input/vanilla_case/data_pass2.csv"; - String expectedDataPass2 = basePath + "expected/vanilla_case/expected_pass2.csv"; - // 1. Load staging table - loadBasicStagingData(dataPass2); - // 2. Execute plans and verify results - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); - } - - /* - Scenario: Test milestoning Logic when staging data comes from CSV and has less columns than main dataset - */ - @Test - void testIncrementalDeltaMilestoningLogicWithLessColumnsInStaging() throws Exception - { - DatasetDefinition mainTable = TestUtils.getBasicMainTable(); - String dataPass1 = basePath + "input/less_columns_in_staging/data_pass1.csv"; - Dataset stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass1); - - // Generate the milestoning object - NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; - - // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ - String expectedDataPass1 = basePath + "expected/less_columns_in_staging/expected_pass1.csv"; - // Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); - - // ------------ Perform incremental (delta) milestoning Pass2 ------------------------ - String dataPass2 = basePath + "input/less_columns_in_staging/data_pass2.csv"; - String expectedDataPass2 = basePath + "expected/less_columns_in_staging/expected_pass2.csv"; - stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass2); - // Execute plans and verify results - executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats); - } - - /* - Scenario: Test milestoning Logic when staging table is pre populated - and isUpdateBatchTimeEnabled is enabled - */ - @Test - void testGeneratePhysicalPlanWithUpdateTimestampColumn() throws Exception - { - DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); - - // Create staging table - createStagingTable(stagingTable); - - // Generate the milestoning object - NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; - - // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ - String dataPass1 = basePath + "input/with_update_timestamp_field/data_pass1.csv"; - String expectedDataPass1 = basePath + "expected/with_update_timestamp_field/expected_pass1.csv"; - // 1. Load staging table - loadBasicStagingData(dataPass1); - // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); - } - - @Test - void testGeneratePhysicalPlanWithDeleteIndicator() throws Exception - { - DatasetDefinition mainTable = TestUtils.getMainTableWithBatchUpdateTimeField(); - DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); - - // Create staging table - createStagingTable(stagingTable); - - // Generate the milestoning object - NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) - .build(); - - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - Datasets datasets = Datasets.of(mainTable, stagingTable); - - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; - - // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ - String dataPass1 = basePath + "input/with_update_timestamp_field/data_pass1.csv"; - String expectedDataPass1 = basePath + "expected/with_update_timestamp_field/expected_pass1.csv"; - // 1. Load staging table - loadBasicStagingData(dataPass1); - // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index ac929288621..b0dfc3d65a8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -17,19 +17,27 @@ import java.util.Arrays; import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.TestUtils; +import org.finos.legend.engine.persistence.components.common.DatasetFilter; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FilterType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.NontemporalDelta; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningComparator; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; +import org.finos.legend.engine.persistence.components.versioning.TestDedupAndVersioning; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -37,18 +45,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.logging.Filter; -import static org.finos.legend.engine.persistence.components.TestUtils.batchUpdateTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.deleteIndicatorName; -import static org.finos.legend.engine.persistence.components.TestUtils.deleteIndicatorValues; -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.dataSplitName; -import static org.finos.legend.engine.persistence.components.TestUtils.versionName; +import static org.finos.legend.engine.persistence.components.TestUtils.*; class NontemporalDeltaTest extends BaseTest { @@ -77,9 +76,9 @@ void testNonTemporalDeltaWithNoAuditing() throws Exception // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -127,9 +126,9 @@ void testNonTemporalDeltaWithDeleteIndicator() throws Exception .digestField(digestName) .auditing(NoAuditing.builder().build()) .mergeStrategy(DeleteIndicatorMergeStrategy.builder() - .deleteField(deleteIndicatorName) - .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) - .build()) + .deleteField(deleteIndicatorName) + .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) + .build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -180,9 +179,9 @@ void testNonTemporalDeltaWithLessColumnsInStaging() throws Exception // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -211,19 +210,20 @@ void testNonTemporalDeltaWithLessColumnsInStaging() throws Exception Scenario: Test NonTemporal Delta when staging table is cleaned up in the end */ @Test - void testNonTemporalDeltaWithCleanStagingData() throws Exception + void testNonTemporalDeltaWithCleanStagingDataWithFailOnDups() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(true).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -245,25 +245,39 @@ void testNonTemporalDeltaWithCleanStagingData() throws Exception // 3. Assert that the staging table is truncated List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); Assertions.assertEquals(stagingTableList.size(), 0); + + // ------------ Perform incremental (delta) milestoning Fail on Dups ------------------------ + String dataPass2 = basePath + "input/with_duplicates/data_pass1.csv"; + loadBasicStagingData(dataPass2); + try + { + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + Assertions.fail("Should not Succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } } /* Scenario: Test NonTemporal Delta when Auditing is enabled */ @Test - void testNonTemporalDeltaWithAuditing() throws Exception + void testNonTemporalDeltaWithAuditingFilterDuplicates() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) - .build(); + .digestField(digestName) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -277,7 +291,7 @@ void testNonTemporalDeltaWithAuditing() throws Exception loadBasicStagingData(dataPass1); // 2. Execute plans and verify results Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 5); expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); @@ -287,7 +301,7 @@ void testNonTemporalDeltaWithAuditing() throws Exception Scenario: Test NonTemporal Delta when Data splits are enabled */ @Test - void testNonTemporalDeltaNoAuditingWithDataSplits() throws Exception + void testNonTemporalDeltaNoAuditingWithAllVersionDoNotPerform() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); String dataPass1 = basePath + "input/with_data_splits/data_pass1.csv"; @@ -296,7 +310,11 @@ void testNonTemporalDeltaNoAuditingWithDataSplits() throws Exception // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(expiryDateName) + .dataSplitFieldName(dataSplitName) + .performStageVersioning(false) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) .auditing(NoAuditing.builder().build()) .build(); @@ -323,7 +341,7 @@ void testNonTemporalDeltaNoAuditingWithDataSplits() throws Exception expectedStats2.put(StatisticName.ROWS_TERMINATED.name(), 0); expectedStatsList.add(expectedStats1); expectedStatsList.add(expectedStats2); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, dataSplitRanges); } @Test @@ -337,14 +355,14 @@ void testNonTemporalDeltaWithMaxVersioningGreaterThan() throws Exception // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) + .build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -390,14 +408,14 @@ void testNonTemporalDeltaWithMaxVersioningGreaterThanEqualTo() throws Exception // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(false) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(false) + .build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -443,14 +461,14 @@ void testNonTemporalDeltaWithMaxVersioningGreaterThanWithDedup() throws Exceptio // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(true) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) + .build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -496,14 +514,14 @@ void testNonTemporalDeltaWithMaxVersioningGreaterThanEqualToWithDedup() throws E // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(true) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(true) + .build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -550,9 +568,9 @@ void testNonTemporalDeltaWithFilterStagingTable() throws Exception // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -601,14 +619,14 @@ void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThan() th // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) + .build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -657,14 +675,14 @@ void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThanEqual // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(false) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(false) + .build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -709,18 +727,19 @@ void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThanWithD // Create staging table DatasetDefinition stagingTableForDB = TestUtils.getStagingTableWithFilterWithVersionForDB(); - createStagingTable(stagingTableForDB); + createStagingTableWithoutPks(stagingTableForDB); // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(true) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) + .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -755,28 +774,46 @@ void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThanWithD expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + + + // ------------ Perform incremental (delta) milestoning Pass3 Fail on Dups ------------------------ + // 0. Create new filter + datasets = Datasets.of(mainTable, TestUtils.getStagingTableWithFilterWithVersionSecondPass()); + String dataPass3 = basePath + "input/with_staging_filter/with_max_versioning/greater_than/with_dedup/data_pass3.csv"; + // 1. Load staging table + loadStagingDataWithFilterWithVersion(dataPass3); + try + { + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + Assertions.fail("Should not Succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } } @Test - void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThanEqualToWithDedup() throws Exception + void testNonTemporalDeltaWithFilterStagingTableWithFilterDupsMaxVersioningGreaterThanEqualTo() throws Exception { DatasetDefinition mainTable = TestUtils.getBasicMainTableWithVersion(); DerivedDataset stagingTable = TestUtils.getDerivedStagingTableWithFilterWithVersion(); // Create staging table DatasetDefinition stagingTableForDB = TestUtils.getStagingTableWithFilterWithVersionForDB(); - createStagingTable(stagingTableForDB); + createStagingTableWithoutPks(stagingTableForDB); // Generate the milestoning object NontemporalDelta ingestMode = NontemporalDelta.builder() - .digestField(digestName) - .auditing(NoAuditing.builder().build()) - .versioningStrategy(MaxVersionStrategy.builder() - .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(true) - .build()) - .build(); + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(true) + .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -790,13 +827,13 @@ void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThanEqual loadStagingDataWithFilterWithVersion(dataPass1); // 2. Execute plans and verify results Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 4); expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); // 3. Assert that the staging table is NOT truncated List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 6); + Assertions.assertEquals(stagingTableList.size(), 7); // ------------ Perform incremental (delta) milestoning Pass2 ------------------------ // 0. Create new filter @@ -807,9 +844,167 @@ void testNonTemporalDeltaWithFilterStagingTableWithMaxVersioningGreaterThanEqual loadStagingDataWithFilterWithVersion(dataPass2); // 2. Execute plans and verify results expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 10); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 12); expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); } + + @Test + void testNonTemporalDeltaWithAllVersionGreaterThanAndStagingFilters() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingDataset = DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersionAndBatch) + .build(); + + createStagingTableWithoutPks(stagingDataset); + DerivedDataset stagingTable = DerivedDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersion) + .addDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 1)) + .build(); + String path = "src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/greater_than/data1.csv"; + TestDedupAndVersioning.loadDataIntoStagingTableWithVersionAndBatch(path); + + // Generate the milestoning object + NontemporalDelta ingestMode = NontemporalDelta.builder() + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(true) + .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; + + // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ + String expectedDataPass1 = basePath + "expected/with_staging_filter/with_all_version/greater_than/expected_pass1.csv"; + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = new HashMap<>(); + expectedStats1.put(StatisticName.INCOMING_RECORD_COUNT.name(), 4); + expectedStats1.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats1.put(StatisticName.ROWS_DELETED.name(), 0); + Map expectedStats2 = new HashMap<>(); + expectedStats2.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); + expectedStats2.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats2.put(StatisticName.ROWS_DELETED.name(), 0); + Map expectedStats3 = new HashMap<>(); + expectedStats3.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); + expectedStats3.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats3.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + expectedStatsList.add(expectedStats3); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform incremental (delta) milestoning Pass2 Fail on Duplicates ------------------------ + ingestMode = ingestMode.withDeduplicationStrategy(FailOnDuplicates.builder().build()); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + try + { + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } + + // ------------ Perform incremental (delta) milestoning Pass2 Filter Duplicates ------------------------ + String expectedDataPass2 = basePath + "expected/with_staging_filter/with_all_version/greater_than/expected_pass2.csv"; + expectedStatsList = new ArrayList<>(); + Map expectedStats4 = new HashMap<>(); + expectedStats4.put(StatisticName.INCOMING_RECORD_COUNT.name(), 4); + expectedStats4.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats4.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStatsList.add(expectedStats4); + + ingestMode = ingestMode.withDeduplicationStrategy(FilterDuplicates.builder().build()); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + } + + @Test + void testNonTemporalDeltaWithAllVersionDigestBasedAndStagingFilters() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingDataset = DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersionAndBatch) + .build(); + + createStagingTableWithoutPks(stagingDataset); + DerivedDataset stagingTable = DerivedDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersion) + .addDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 1)) + .build(); + String path = "src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/digest_based/data1.csv"; + TestDedupAndVersioning.loadDataIntoStagingTableWithVersionAndBatch(path); + + // Generate the milestoning object + NontemporalDelta ingestMode = NontemporalDelta.builder() + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; + + // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ + String expectedDataPass1 = basePath + "expected/with_staging_filter/with_all_version/digest_based/expected_pass1.csv"; + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = new HashMap<>(); + expectedStats1.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats1.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats1.put(StatisticName.ROWS_DELETED.name(), 0); + Map expectedStats2 = new HashMap<>(); + expectedStats2.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); + expectedStats2.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats2.put(StatisticName.ROWS_DELETED.name(), 0); + Map expectedStats3 = new HashMap<>(); + expectedStats3.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); + expectedStats3.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats3.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + expectedStatsList.add(expectedStats3); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform incremental (delta) milestoning Pass2 Filter Duplicates ------------------------ + String expectedDataPass2 = basePath + "expected/with_staging_filter/with_all_version/digest_based/expected_pass2.csv"; + expectedStatsList = new ArrayList<>(); + Map expectedStats4 = new HashMap<>(); + expectedStats4.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats4.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats4.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStatsList.add(expectedStats4); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + } + } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index 903e83bc355..9e1ce6ca59b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -20,25 +20,21 @@ import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; -import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; +import org.finos.legend.engine.persistence.components.versioning.TestDedupAndVersioning; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.ArrayList; import java.util.List; import java.util.Map; -import static org.finos.legend.engine.persistence.components.TestUtils.batchUpdateTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.dataSplitName; -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; +import static org.finos.legend.engine.persistence.components.TestUtils.*; class NontemporalSnapshotTest extends BaseTest { @@ -52,7 +48,8 @@ class NontemporalSnapshotTest extends BaseTest 4. No Auditing & import external CSV dataset 5. Staging has lesser columns than main dataset 6. Staging data cleanup - 7. Data Splits enabled + 7. With Auditing, Max Version, Filter Duplicates + 8. With Auditing, No Version, Fail on Duplicates */ /* @@ -255,38 +252,106 @@ void testNontemporalSnapshotWithCleanStagingData() throws Exception } /* - Scenario: Test Nontemporal Snapshot when data splits are enabled + Scenario: Test Nontemporal Snapshot when MaxVersion and FilterDuplicates are enabled */ @Test - void testNontemporalSnapshotWithDataSplits() throws Exception + void testNontemporalSnapshotWithMaxVersionAndFilterDuplicates() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); - String dataPass1 = basePath + "input/with_data_splits/data_pass1.csv"; - Dataset stagingTable = TestUtils.getBasicCsvDatasetReferenceTableWithDataSplits(dataPass1); + DatasetDefinition stagingTable = TestDedupAndVersioning.getStagingTableWithVersion(); + + // Create staging table + TestDedupAndVersioning.createStagingTableWithVersion(); // Generate the milestoning object NontemporalSnapshot ingestMode = NontemporalSnapshot.builder() .auditing(NoAuditing.builder().build()) - .dataSplitField(dataSplitName) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; - // ------------ Perform incremental (append) milestoning Pass1 ------------------------ - String expectedDataPass1 = basePath + "expected/with_data_splits/expected_pass1.csv"; - // Execute plans and verify results - List dataSplitRanges = new ArrayList<>(); - dataSplitRanges.add(DataSplitRange.of(1, 1)); - dataSplitRanges.add(DataSplitRange.of(2, 2)); - dataSplitRanges.add(DataSplitRange.of(3, 3)); - - List> expectedStatsList = new ArrayList<>(); - Map expectedStats = createExpectedStatsMap(5, 0, 3, 0, 0); - expectedStatsList.add(expectedStats); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, dataSplitRanges); + // ------------ Perform snapshot milestoning Pass1 ------------------------ + String dataPass1 = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPass1 = basePath + "expected/max_version_filter_duplicates/expected_pass1.csv"; + // 1. Load staging table + TestDedupAndVersioning.loadDataIntoStagingTableWithVersion(dataPass1); + // 2. Execute plans and verify results + + Map expectedStats = createExpectedStatsMap(6, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + + // ------------ Perform snapshot milestoning Pass2 ------------------------ + // Throw Data Error + String dataPass2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; + // 1. Load staging table + TestDedupAndVersioning.loadDataIntoStagingTableWithVersion(dataPass2); + // 2. Execute plans and verify results + try + { + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + } + } + + /* + Scenario: Test Nontemporal Snapshot when No Version and FailOnDuplicates + */ + @Test + void testNontemporalSnapshotWithFailOnDupsNoVersioning() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestDedupAndVersioning.getStagingTableWithoutVersion(); + + // Create staging table + TestDedupAndVersioning.createStagingTableWithoutVersion(); + + // Generate the milestoning object + NontemporalSnapshot ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, incomeName, expiryDateName, digestName}; + + // ------------ Perform snapshot milestoning Pass1 ------------------------ + String dataPass1 = "src/test/resources/data/dedup-and-versioning/input/data5_without_dups.csv"; + String expectedDataPass1 = "src/test/resources/data/dedup-and-versioning/input/data5_without_dups.csv"; + // 1. Load staging table + TestDedupAndVersioning.loadDataIntoStagingTableWithoutVersion(dataPass1); + // 2. Execute plans and verify results + + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + + // ------------ Perform snapshot milestoning Pass2 ------------------------ + // Throw Data Error + String dataPass2 = "src/test/resources/data/dedup-and-versioning/input/data1_with_dups.csv"; + // 1. Load staging table + TestDedupAndVersioning.loadDataIntoStagingTableWithoutVersion(dataPass2); + // 2. Execute plans and verify results + try + { + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java index 8b3a608bfd3..59ff0c03089 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java @@ -145,8 +145,8 @@ public void testMultiTableIngestionSuccessCase() throws Exception verifyResults(2, datsetSchema2, expectedDataset2Path, "main2", result.get(1), expectedStats); // Pass 3: - dataset1Path = basePathForInput + "multi_table_ingestion/staging_dataset_pass3.csv"; - dataset2Path = basePathForInput + "multi_table_ingestion/staging_dataset_pass3.csv"; + dataset1Path = "src/test/resources/data/empty_file.csv"; + dataset2Path = "src/test/resources/data/empty_file.csv"; expectedDataset1Path = basePathForExpected + "multi_table_ingestion/expected_dataset1_pass3.csv"; expectedDataset2Path = basePathForExpected + "multi_table_ingestion/expected_dataset2_pass3.csv"; expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); @@ -171,7 +171,7 @@ private List ingestMultiTables(Executor executor, RelationalInge executor.begin(); for (Datasets datasets: allDatasets) { - IngestorResult result = ingestor.ingest(datasets); + IngestorResult result = ingestor.ingest(datasets).get(0); multiTableIngestionResult.add(result); } @@ -256,7 +256,7 @@ private List ingestMultiTablesWithBadQuery(Executor executor, Re executor.begin(); for (Datasets datasets: allDatasets) { - IngestorResult result = ingestor.ingest(datasets); + IngestorResult result = ingestor.ingest(datasets).get(0); multiTableIngestionResult.add(result); } @@ -301,7 +301,7 @@ private void loadStagingDataset2(String path) throws Exception public static void verifyResults(int batchId, String[] schema, String expectedDataPath, String tableName, IngestorResult result, Map expectedStats) throws IOException { Assertions.assertEquals(batchId, result.batchId().get()); - Assertions.assertEquals("2000-01-01 00:00:00", result.ingestionTimestampUTC()); + Assertions.assertEquals("2000-01-01 00:00:00.000000", result.ingestionTimestampUTC()); List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", tableName)); TestUtils.assertFileAndTableDataEquals(schema, expectedDataPath, tableData); Map actualStats = result.statisticByName(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDbAndSchemaMissingTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDbAndSchemaMissingTest.java index 9e1d2079eb9..bb8cb4dbf4f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDbAndSchemaMissingTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaDbAndSchemaMissingTest.java @@ -114,7 +114,7 @@ void testMilestoning(DatasetDefinition mainTable, DatasetDefinition stagingTable executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "without_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "without_delete_ind/expected_pass3.csv"; // 1. Load staging table loadStagingData(dataPass3, stagingTableFullyQualifiedName); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java index cf8c5f9ebec..c5a6709584f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java @@ -19,10 +19,14 @@ import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.OptimizationFilter; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningComparator; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionComparator; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; @@ -105,7 +109,7 @@ void testMilestoning() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "without_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "without_delete_ind/expected_pass3.csv"; // 1. Load staging table loadBasicStagingData(dataPass3); @@ -165,7 +169,7 @@ void testMilestoningWithDeleteIndicator() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_delete_ind/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithDeleteInd(dataPass3); @@ -222,7 +226,7 @@ void testMilestoningWithOptimizationFilters() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_optimization_filter/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_optimization_filter/expected_pass3.csv"; // 1. Load staging table loadBasicStagingData(dataPass3); @@ -232,7 +236,7 @@ void testMilestoningWithOptimizationFilters() throws Exception } @Test - void testMilestoningWithMaxVersioningGreaterThan() throws Exception + void testMilestoningWithMaxVersionGreaterThanDoNotPerform() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DatasetDefinition stagingTable = TestUtils.getStagingTableWithVersion(); @@ -252,8 +256,8 @@ void testMilestoningWithMaxVersioningGreaterThan() throws Exception .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) .build()) .build(); @@ -282,7 +286,7 @@ void testMilestoningWithMaxVersioningGreaterThan() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_max_versioning/greater_than/without_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithVersion(dataPass3); @@ -292,7 +296,7 @@ void testMilestoningWithMaxVersioningGreaterThan() throws Exception } @Test - void testMilestoningWithMaxVersioningGreaterThanEqualTo() throws Exception + void testMilestoningWithMaxVersionGreaterThanEqualToDoNotPerform() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DatasetDefinition stagingTable = TestUtils.getStagingTableWithVersion(); @@ -312,8 +316,8 @@ void testMilestoningWithMaxVersioningGreaterThanEqualTo() throws Exception .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(false) .build()) .build(); @@ -342,7 +346,7 @@ void testMilestoningWithMaxVersioningGreaterThanEqualTo() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_max_versioning/greater_than_equal_to/without_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithVersion(dataPass3); @@ -352,7 +356,7 @@ void testMilestoningWithMaxVersioningGreaterThanEqualTo() throws Exception } @Test - void testMilestoningWithMaxVersioningGreaterThanWithDedup() throws Exception + void testMilestoningWithFilterDuplicatesMaxVersioningGreaterThan() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DatasetDefinition stagingTable = TestUtils.getStagingTableWithVersion(); @@ -360,7 +364,7 @@ void testMilestoningWithMaxVersioningGreaterThanWithDedup() throws Exception String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, versionName, batchIdInName, batchIdOutName, batchTimeInName, batchTimeOutName}; // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) @@ -372,9 +376,10 @@ void testMilestoningWithMaxVersioningGreaterThanWithDedup() throws Exception .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(true) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -386,11 +391,11 @@ void testMilestoningWithMaxVersioningGreaterThanWithDedup() throws Exception // 1. Load staging table loadStagingDataWithVersion(dataPass1); // 2. Execute plans and verify results - Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + Map expectedStats = createExpectedStatsMap(6, 0, 3, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); // 3. Assert that the staging table is NOT truncated List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 3); + Assertions.assertEquals(stagingTableList.size(), 6); // ------------ Perform Pass2 ------------------------ String dataPass2 = basePathForInput + "with_max_versioning/greater_than/with_dedup/staging_data_pass2.csv"; @@ -402,7 +407,7 @@ void testMilestoningWithMaxVersioningGreaterThanWithDedup() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_max_versioning/greater_than/with_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithVersion(dataPass3); @@ -412,7 +417,7 @@ void testMilestoningWithMaxVersioningGreaterThanWithDedup() throws Exception } @Test - void testMilestoningWithMaxVersioningGreaterThanEqualToWithDedup() throws Exception + void testMilestoningWithFailOnDuplicatesMaxVersioningGreaterThanEqualTo() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DatasetDefinition stagingTable = TestUtils.getStagingTableWithVersion(); @@ -420,7 +425,7 @@ void testMilestoningWithMaxVersioningGreaterThanEqualToWithDedup() throws Except String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, versionName, batchIdInName, batchIdOutName, batchTimeInName, batchTimeOutName}; // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) @@ -432,9 +437,10 @@ void testMilestoningWithMaxVersioningGreaterThanEqualToWithDedup() throws Except .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(true) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(true) .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -462,13 +468,28 @@ void testMilestoningWithMaxVersioningGreaterThanEqualToWithDedup() throws Except executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_max_versioning/greater_than_equal_to/with_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithVersion(dataPass3); // 2. Execute plans and verify results expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); + + // ------------ Perform Pass4 (Fail on Dups) ------------------------- + String dataPass4 = basePathForInput + "with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass4.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass4); + // 2. Execute plans and verify results + try + { + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } } @Test @@ -505,7 +526,7 @@ void testMilestoningWithFilterStagingTable() throws Exception Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); IngestorResult result = executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); Assertions.assertEquals(Optional.of(1), result.batchId()); - Assertions.assertEquals("2000-01-01 00:00:00", result.ingestionTimestampUTC()); + Assertions.assertEquals("2000-01-01 00:00:00.000000", result.ingestionTimestampUTC()); // 3. Assert that the staging table is NOT truncated List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); @@ -522,10 +543,10 @@ void testMilestoningWithFilterStagingTable() throws Exception expectedStats = createExpectedStatsMap(3, 0, 1, 1, 0); result = executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); Assertions.assertEquals(Optional.of(2), result.batchId()); - Assertions.assertEquals("2000-01-01 00:00:00", result.ingestionTimestampUTC()); + Assertions.assertEquals("2000-01-01 00:00:00.000000", result.ingestionTimestampUTC()); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_staging_filter/with_no_versioning/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_no_versioning/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithFilter(dataPass3); @@ -533,11 +554,11 @@ void testMilestoningWithFilterStagingTable() throws Exception expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); result = executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); Assertions.assertEquals(Optional.of(3), result.batchId()); - Assertions.assertEquals("2000-01-01 00:00:00", result.ingestionTimestampUTC()); + Assertions.assertEquals("2000-01-01 00:00:00.000000", result.ingestionTimestampUTC()); } @Test - void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws Exception + void testMilestoningWithFilterDupsMaxVersionGreaterThanWithStagingFilters() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DerivedDataset stagingTable = TestUtils.getDerivedStagingTableWithFilterWithVersion(); @@ -546,7 +567,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws // Create staging table DatasetDefinition stagingTableForDB = TestUtils.getStagingTableWithFilterWithVersionForDB(); - createStagingTable(stagingTableForDB); + createStagingTableWithoutPks(stagingTableForDB); UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) @@ -558,9 +579,10 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -572,11 +594,11 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass1); // 2. Execute plans and verify results - Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + Map expectedStats = createExpectedStatsMap(6, 0, 3, 0, 0); executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); // 3. Assert that the staging table is NOT truncated List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); - Assertions.assertEquals(stagingTableList.size(), 6); + Assertions.assertEquals(stagingTableList.size(), 9); // ------------ Perform Pass2 ------------------------ // 0. Create new filter @@ -590,7 +612,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than/without_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass3); @@ -600,7 +622,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws } @Test - void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualTo() throws Exception + void testMilestoningWithFailOnDupsMaxVersionGreaterThanEqualToWithStagingFilters() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DerivedDataset stagingTable = TestUtils.getDerivedStagingTableWithFilterWithVersion(); @@ -609,7 +631,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualTo() // Create staging table DatasetDefinition stagingTableForDB = TestUtils.getStagingTableWithFilterWithVersionForDB(); - createStagingTable(stagingTableForDB); + createStagingTableWithoutPks(stagingTableForDB); UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) @@ -621,9 +643,10 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualTo() .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(false) .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -653,17 +676,32 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualTo() executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass3); // 2. Execute plans and verify results expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform Pass4 Fail on Dups ------------------------- + String dataPass4 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass4.csv"; + // 1. Load staging table + loadStagingDataWithFilterWithVersion(dataPass4); + // 2. Execute plans and verify results + try + { + executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } } @Test - void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedup() throws Exception + void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThan() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DerivedDataset stagingTable = TestUtils.getDerivedStagingTableWithFilterWithVersion(); @@ -684,8 +722,8 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedup( .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(true) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) .build()) .build(); @@ -716,7 +754,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedup( executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than/with_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass3); @@ -726,7 +764,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedup( } @Test - void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualToWithDedup() throws Exception + void testMilestoningWithFilterDupsMaxVersioningDigestBasedWithStagingFilters() throws Exception { DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); DerivedDataset stagingTable = TestUtils.getDerivedStagingTableWithFilterWithVersion(); @@ -747,17 +785,18 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualToWit .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(true) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); Datasets datasets = Datasets.of(mainTable, stagingTable); // ------------ Perform Pass1 ------------------------ - String dataPass1 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass1.csv"; - String expectedDataPass1 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass1.csv"; + String dataPass1 = basePathForInput + "with_staging_filter/with_max_versioning/digest_based/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_staging_filter/with_max_versioning/digest_based/expected_pass1.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass1); // 2. Execute plans and verify results @@ -770,17 +809,17 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanEqualToWit // ------------ Perform Pass2 ------------------------ // 0. Create new filter datasets = Datasets.of(mainTable, TestUtils.getStagingTableWithFilterWithVersionSecondPass()); - String dataPass2 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass2.csv"; - String expectedDataPass2 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass2.csv"; + String dataPass2 = basePathForInput + "with_staging_filter/with_max_versioning/digest_based/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "with_staging_filter/with_max_versioning/digest_based/expected_pass2.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass2); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(9, 0, 1, 3, 0); + expectedStats = createExpectedStatsMap(9, 0, 1, 2, 0); executePlansAndVerifyResultsWithStagingFilters(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv"; - String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/digest_based/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersion(dataPass3); // 2. Execute plans and verify results @@ -809,8 +848,8 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedupW .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(true) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) .build()) .build(); @@ -838,7 +877,7 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedupW executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than/with_dedup/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithFilterWithVersionInUpperCase(dataPass3); @@ -868,8 +907,8 @@ void testMilestoningWithMaxVersioningFail() throws Exception .build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(nameName) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) .build()) .build(); @@ -934,7 +973,7 @@ void testMilestoningWithLessColumnsInStaging() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "less_columns_in_staging/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "less_columns_in_staging/expected_pass3.csv"; stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass3); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchIdTest.java index 1e3abfd633b..4cfbd6efbc8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchIdTest.java @@ -16,33 +16,34 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.TestUtils; +import org.finos.legend.engine.persistence.components.common.DatasetFilter; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FilterType; +import org.finos.legend.engine.persistence.components.common.StatisticName; +import org.finos.legend.engine.persistence.components.ingestmode.NontemporalDelta; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionComparator; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; +import org.finos.legend.engine.persistence.components.versioning.TestDedupAndVersioning; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.ArrayList; - -import static org.finos.legend.engine.persistence.components.TestUtils.batchIdInName; -import static org.finos.legend.engine.persistence.components.TestUtils.batchIdOutName; -import static org.finos.legend.engine.persistence.components.TestUtils.deleteIndicatorName; -import static org.finos.legend.engine.persistence.components.TestUtils.deleteIndicatorValues; -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.dataSplitName; +import java.util.*; + +import static org.finos.legend.engine.persistence.components.TestUtils.*; +import static org.finos.legend.engine.persistence.components.TestUtils.versionName; class UnitemporalDeltaWithBatchIdTest extends BaseTest { @@ -61,7 +62,7 @@ void testMilestoning() throws Exception String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchIdInName, batchIdOutName}; // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) @@ -69,6 +70,7 @@ void testMilestoning() throws Exception .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -92,11 +94,11 @@ void testMilestoning() throws Exception // 1. Load staging table loadBasicStagingData(dataPass2); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(3, 0, 1, 1, 0); + expectedStats = createExpectedStatsMap(6, 0, 1, 1, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "without_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "without_delete_ind/expected_pass3.csv"; // 1. Load staging table loadBasicStagingData(dataPass3); @@ -197,7 +199,7 @@ void testMilestoningWithDeleteIndicator() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_delete_ind/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithDeleteInd(dataPass3); @@ -246,7 +248,7 @@ void testMilestoningWithLessColumnsInStaging() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "less_columns_in_staging/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "less_columns_in_staging/expected_pass3.csv"; stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass3); @@ -299,7 +301,7 @@ void testMilestoningWithDeleteIndicatorWithCleanStagingData() throws Exception } @Test - void testMilestoningWithDataSplits() throws Exception + void testMilestoningAllVersionWithoutPerform() throws Exception { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); String dataPass1 = basePathForInput + "with_data_splits/staging_data_pass1.csv"; @@ -309,7 +311,11 @@ void testMilestoningWithDataSplits() throws Exception UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) - .dataSplitField(dataSplitName) + .versioningStrategy(AllVersionsStrategy.builder() + .dataSplitFieldName(dataSplitName) + .versioningField(expiryDateName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false).build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) @@ -333,9 +339,10 @@ void testMilestoningWithDataSplits() throws Exception expectedStatsList.add(expectedStatsSplit1); expectedStatsList.add(expectedStatsSplit2); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, dataSplitRanges); // ------------ Perform milestoning Pass2 ------------------------ + ingestMode = ingestMode.withDeduplicationStrategy(FilterDuplicates.builder().build()); String dataPass2 = basePathForInput + "with_data_splits/staging_data_pass2.csv"; stagingTable = TestUtils.getBasicCsvDatasetReferenceTableWithDataSplits(dataPass2); String expectedDataPass2 = basePathForExpected + "with_data_splits/expected_pass2.csv"; @@ -350,16 +357,203 @@ void testMilestoningWithDataSplits() throws Exception expectedStatsList.add(createExpectedStatsMap(1, 0, 0, 1, 0)); expectedStatsList.add(createExpectedStatsMap(1, 0, 0, 1, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, Datasets.of(mainTable, stagingTable), schema, expectedDataPass2, expectedStatsList, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, Datasets.of(mainTable, stagingTable), schema, expectedDataPass2, expectedStatsList, dataSplitRanges); // ------------ Perform milestoning Pass3 - Empty batch ------------------------ - String dataPass3 = basePathForInput + "with_data_splits/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; stagingTable = TestUtils.getBasicCsvDatasetReferenceTableWithDataSplits(dataPass3); String expectedDataPass3 = basePathForExpected + "with_data_splits/expected_pass3.csv"; // Execute plans and verify results dataSplitRanges = new ArrayList<>(); expectedStatsList = new ArrayList<>(); expectedStatsList.add(createExpectedStatsMap(0, 0, 0, 0, 0)); - executePlansAndVerifyResultsWithDataSplits(ingestMode, options, Datasets.of(mainTable, stagingTable), schema, expectedDataPass3, expectedStatsList, dataSplitRanges); + executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, Datasets.of(mainTable, stagingTable), schema, expectedDataPass3, expectedStatsList, dataSplitRanges); + } + + @Test + void testUniTemporalDeltaWithAllVersionGreaterThanAndStagingFilters() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingDataset = DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersionAndBatch) + .build(); + + createStagingTableWithoutPks(stagingDataset); + DerivedDataset stagingTable = DerivedDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersion) + .addDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 1)) + .build(); + String path = basePathForInput + "with_all_version/data1.csv"; + TestDedupAndVersioning.loadDataIntoStagingTableWithVersionAndBatch(path); + + // Generate the milestoning object + UnitemporalDelta ingestMode = UnitemporalDelta.builder() + .digestField(digestName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) + .build()) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; + + // ------------ Perform milestoning Pass1 ------------------------ + String expectedDataPass1 = basePathForExpected + "with_all_version/greater_than/expected_pass1.csv"; + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = createExpectedStatsMap(3,0,3,0,0); + Map expectedStats2 = createExpectedStatsMap(2,0,0,2,0); + Map expectedStats3 = createExpectedStatsMap(1,0,0,1,0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + expectedStatsList.add(expectedStats3); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform milestoning Pass2 Fail on Duplicates ------------------------ + ingestMode = ingestMode.withDeduplicationStrategy(FailOnDuplicates.builder().build()); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + try + { + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } + + // ------------ Perform milestoning Pass2 Filter Duplicates ------------------------ + String expectedDataPass2 = basePathForExpected + "with_all_version/greater_than/expected_pass2.csv"; + expectedStatsList = new ArrayList<>(); + Map expectedStats4 = createExpectedStatsMap(4,0,1,0,0); + Map expectedStats5 = createExpectedStatsMap(2,0,0,2,0); + expectedStatsList.add(expectedStats4); + expectedStatsList.add(expectedStats5); + + ingestMode = ingestMode.withDeduplicationStrategy(FilterDuplicates.builder().build()); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform milestoning Pass3 Data Error ------------------------ + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 3)); + datasets = Datasets.of(mainTable, stagingTable); + + try + { + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + } + } + + @Test + void testUniTemporalDeltaWithAllVersionDigestBasedAndStagingFilters() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingDataset = DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersionAndBatch) + .build(); + + createStagingTableWithoutPks(stagingDataset); + DerivedDataset stagingTable = DerivedDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersion) + .addDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 1)) + .build(); + String path = basePathForInput + "with_all_version/data1.csv"; + TestDedupAndVersioning.loadDataIntoStagingTableWithVersionAndBatch(path); + + // Generate the milestoning object + UnitemporalDelta ingestMode = UnitemporalDelta.builder() + .digestField(digestName) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; + + // ------------ Perform milestoning Pass1 ------------------------ + String expectedDataPass1 = basePathForExpected + "with_all_version/digest_based/expected_pass1.csv"; + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = createExpectedStatsMap(3,0,3,0,0); + Map expectedStats2 = createExpectedStatsMap(2,0,0,2,0); + Map expectedStats3 = createExpectedStatsMap(1,0,0,1,0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + expectedStatsList.add(expectedStats3); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform milestoning Pass2 Fail on Duplicates ------------------------ + ingestMode = ingestMode.withDeduplicationStrategy(FailOnDuplicates.builder().build()); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + try + { + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } + + // ------------ Perform milestoning Pass2 Filter Duplicates ------------------------ + String expectedDataPass2 = basePathForExpected + "with_all_version/digest_based/expected_pass2.csv"; + expectedStatsList = new ArrayList<>(); + Map expectedStats4 = createExpectedStatsMap(4,0,1,1,0); + Map expectedStats5 = createExpectedStatsMap(2,0,0,2,0); + expectedStatsList.add(expectedStats4); + expectedStatsList.add(expectedStats5); + + ingestMode = ingestMode.withDeduplicationStrategy(FilterDuplicates.builder().build()); + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 2)); + datasets = Datasets.of(mainTable, stagingTable); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform milestoning Pass3 Data Error ------------------------ + stagingTable = stagingTable.withDatasetFilters(DatasetFilter.of("batch", FilterType.EQUAL_TO, 3)); + datasets = Datasets.of(mainTable, stagingTable); + + try + { + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch", e.getMessage()); + } } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchTimeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchTimeTest.java index d1934283bfe..fcc3eec3c89 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchTimeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaWithBatchTimeTest.java @@ -18,6 +18,7 @@ import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; @@ -58,7 +59,7 @@ void testMilestoning() throws Exception String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchTimeInName, batchTimeOutName}; // Create staging table - createStagingTable(stagingTable); + createStagingTableWithoutPks(stagingTable); UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestName) @@ -66,6 +67,7 @@ void testMilestoning() throws Exception .dateTimeInName(batchTimeInName) .dateTimeOutName(batchTimeOutName) .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -93,13 +95,28 @@ void testMilestoning() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "without_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "without_delete_ind/expected_pass3.csv"; // 1. Load staging table loadBasicStagingData(dataPass3); // 2. Execute plans and verify results expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_03); + + // ------------ Perform Pass4 Fail on Duplicates ------------------------- + String dataPass4 = basePathForInput + "without_delete_ind/staging_data_pass4.csv"; + // 1. Load staging table + loadBasicStagingData(dataPass4); + // 2. Execute plans and verify results + try + { + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_03); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); + } } /* @@ -150,7 +167,7 @@ void testMilestoningWithDeleteIndicator() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "with_delete_ind/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_delete_ind/expected_pass3.csv"; // 1. Load staging table loadStagingDataWithDeleteInd(dataPass3); @@ -197,7 +214,7 @@ void testMilestoningWithLessColumnsInStaging() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets.withStagingDataset(stagingTable), schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); // ------------ Perform Pass3 empty batch (No Impact) ------------------------- - String dataPass3 = basePathForInput + "less_columns_in_staging/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "less_columns_in_staging/expected_pass3.csv"; stagingTable = TestUtils.getCsvDatasetRefWithLessColumnsThanMain(dataPass3); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java index 92126d34a4a..8ab15d09f9f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java @@ -18,9 +18,12 @@ import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.FailEmptyBatch; import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; @@ -69,8 +72,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ - String dataPass1 = basePathForInput + "without_partition/staging_data_pass1.csv"; - String expectedDataPass1 = basePathForExpected + "without_partition/expected_pass1.csv"; + String dataPass1 = basePathForInput + "without_partition/no_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "without_partition/no_version/expected_pass1.csv"; // 1. Load staging table loadBasicStagingData(dataPass1); // 2. Execute plans and verify results @@ -81,8 +84,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception Assertions.assertEquals(stagingTableList.size(), 3); // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ - String dataPass2 = basePathForInput + "without_partition/staging_data_pass2.csv"; - String expectedDataPass2 = basePathForExpected + "without_partition/expected_pass2.csv"; + String dataPass2 = basePathForInput + "without_partition/no_version/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "without_partition/no_version/expected_pass2.csv"; // 1. Load staging table loadBasicStagingData(dataPass2); // 2. Execute plans and verify results @@ -93,8 +96,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception options = options.withCleanupStagingData(true); - String dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; - String expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "without_partition/no_version/expected_pass3.csv"; // 1. Load Staging table loadBasicStagingData(dataPass3); // 2. Execute plans and verify results @@ -127,8 +130,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartitionWithCaseConversion() Datasets datasets = Datasets.of(mainTable, stagingTable); // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ - String dataPass1 = basePathForInput + "without_partition/staging_data_pass1.csv"; - String expectedDataPass1 = basePathForExpected + "without_partition/expected_pass1.csv"; + String dataPass1 = basePathForInput + "without_partition/no_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "without_partition/no_version/expected_pass1.csv"; // 1. Load staging table loadBasicStagingDataInUpperCase(dataPass1); // 2. Execute plans and verify results @@ -139,8 +142,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartitionWithCaseConversion() Assertions.assertEquals(stagingTableList.size(), 3); // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ - String dataPass2 = basePathForInput + "without_partition/staging_data_pass2.csv"; - String expectedDataPass2 = basePathForExpected + "without_partition/expected_pass2.csv"; + String dataPass2 = basePathForInput + "without_partition/no_version/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "without_partition/no_version/expected_pass2.csv"; // 1. Load staging table loadBasicStagingDataInUpperCase(dataPass2); // 2. Execute plans and verify results @@ -161,8 +164,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartitionWithCaseConversion() options = options.withCleanupStagingData(true); - String dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; - String expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "without_partition/no_version/expected_pass3.csv"; // 1. Load Staging table loadBasicStagingDataInUpperCase(dataPass3); // 2. Execute plans and verify results @@ -191,8 +194,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartitionWithCaseConversion() options = options.withCleanupStagingData(true); - dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; - expectedDataPass3 = basePathForExpected + "without_partition/expected_pass2.csv"; + dataPass3 = "src/test/resources/data/empty_file.csv"; + expectedDataPass3 = basePathForExpected + "without_partition/no_version/expected_pass2.csv"; // 1. Load Staging table loadBasicStagingDataInUpperCase(dataPass3); // 2. Execute plans and verify results @@ -203,8 +206,8 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartitionWithCaseConversion() // ------------ Perform unitemporal snapshot milestoning Pass6 (Empty Batch) Empty Data Handling = Skip ------------------------ options = options.withCleanupStagingData(true); - dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; - expectedDataPass3 = basePathForExpected + "without_partition/expected_pass4.csv"; + dataPass3 = "src/test/resources/data/empty_file.csv"; + expectedDataPass3 = basePathForExpected + "without_partition/no_version/expected_pass4.csv"; // 1. Load Staging table loadBasicStagingDataInUpperCase(dataPass3); // 2. Execute plans and verify results @@ -244,8 +247,8 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ - String dataPass1 = basePathForInput + "with_partition/staging_data_pass1.csv"; - String expectedDataPass1 = basePathForExpected + "with_partition/expected_pass1.csv"; + String dataPass1 = basePathForInput + "with_partition/no_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_partition/no_version/expected_pass1.csv"; // 1. Load staging table loadStagingDataForWithPartition(dataPass1); // 2. Execute plans and verify results @@ -253,8 +256,8 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ - String dataPass2 = basePathForInput + "with_partition/staging_data_pass2.csv"; - String expectedDataPass2 = basePathForExpected + "with_partition/expected_pass2.csv"; + String dataPass2 = basePathForInput + "with_partition/no_version/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "with_partition/no_version/expected_pass2.csv"; // 1. Load staging table loadStagingDataForWithPartition(dataPass2); // 2. Execute plans and verify results @@ -265,8 +268,8 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception options = options.withCleanupStagingData(true); - String dataPass3 = basePathForInput + "with_partition/staging_data_pass3.csv"; - String expectedDataPass3 = basePathForExpected + "with_partition/expected_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "with_partition/no_version/expected_pass3.csv"; // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); // 2. Execute plans and verify results @@ -346,8 +349,8 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionWithCleanStagingDataWit Datasets datasets = Datasets.of(mainTable, stagingTable); // ------------ Perform unitemporal snapshot milestoning With Clean Staging Table ------------------------ - String dataPass1 = basePathForInput + "with_partition/staging_data_pass1.csv"; - String expectedDataPass1 = basePathForExpected + "with_partition/expected_pass1.csv"; + String dataPass1 = basePathForInput + "with_partition/no_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_partition/no_version/expected_pass1.csv"; // 1. Load staging table loadStagingDataForWithPartition(dataPass1); // 2. Execute plans and verify results @@ -384,8 +387,8 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionWithoutCleanStagingData Datasets datasets = Datasets.of(mainTable, stagingTable); // ------------ Perform unitemporal snapshot milestoning With Clean Staging Table ------------------------ - String dataPass1 = basePathForInput + "with_partition/staging_data_pass1.csv"; - String expectedDataPass1 = basePathForExpected + "with_partition/expected_pass1.csv"; + String dataPass1 = basePathForInput + "with_partition/no_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_partition/no_version/expected_pass1.csv"; // 1. Load staging table loadStagingDataForWithPartition(dataPass1); // 2. Execute plans and verify results @@ -395,4 +398,194 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionWithoutCleanStagingData List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); Assertions.assertEquals(stagingTableList.size(), 6); } + + /* + Scenario: Test milestoning Logic with max version and without Partition when staging table pre populated + */ + @Test + void testUnitemporalSnapshotMilestoningLogicMaxVersionWithoutPartitionAllowDuplicates() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNonPkVersion(); + + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, versionName, batchIdInName, batchIdOutName, batchTimeInName, batchTimeOutName}; + + // Create staging table + createStagingTableWithoutPks(stagingTable); + + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.builder().build()) + .performStageVersioning(true) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ + String dataPass1 = basePathForInput + "without_partition/max_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "without_partition/max_version/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(6, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + // 3. Assert that the staging table is NOT truncated + List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"staging\""); + Assertions.assertEquals(stagingTableList.size(), 6); + + // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ + String dataPass2 = basePathForInput + "without_partition/max_version/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "without_partition/max_version/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithVersion(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(4, 0, 1, 2, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ + options = options.withCleanupStagingData(true); + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "without_partition/max_version/expected_pass3.csv"; + // 1. Load Staging table + loadStagingDataWithVersion(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 4); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + } + + /* + Scenario: Test milestoning Logic with max version and with Partition when staging table pre populated + */ + @Test + void testUnitemporalSnapshotMilestoningLogicMaxVersionWithPartitionFilterDuplicates() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getEntityPriceWithVersionStagingTable(); + + String[] schema = new String[]{dateName, entityName, priceName, volumeName, digestName, versionName, batchIdInName, batchIdOutName, batchTimeInName, batchTimeOutName}; + + // Create staging table + createStagingTableWithoutPks(stagingTable); + + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .addAllPartitionFields(Collections.singletonList(dateName)) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.builder().build()) + .performStageVersioning(true) + .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ + String dataPass1 = basePathForInput + "with_partition/max_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_partition/max_version/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataForWithPartitionWithVersion(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(9, 0, 6, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ + String dataPass2 = basePathForInput + "with_partition/max_version/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "with_partition/max_version/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataForWithPartitionWithVersion(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(4, 0, 1, 1, 1); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ + options = options.withCleanupStagingData(true); + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "with_partition/max_version/expected_pass3.csv"; + // 1. Load Staging table + loadStagingDataForWithPartitionWithVersion(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + } + + /* + Scenario: Test milestoning Logic with max version and with Partition when staging table pre populated with upper case + */ + @Test + void testUnitemporalSnapshotMilestoningLogicMaxVersionWithPartitionFilterDuplicatesUpperCase() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getEntityPriceWithVersionStagingTable(); + + String[] schema = new String[]{dateName.toUpperCase(), entityName.toUpperCase(), priceName.toUpperCase(), volumeName.toUpperCase(), digestName.toUpperCase(), versionName.toUpperCase(), batchIdInName.toUpperCase(), batchIdOutName.toUpperCase(), batchTimeInName.toUpperCase(), batchTimeOutName.toUpperCase()}; + + // Create staging table + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"DATE\" DATE NOT NULL,\"ENTITY\" VARCHAR NOT NULL,\"PRICE\" DECIMAL(20,2),\"VOLUME\" BIGINT,\"DIGEST\" VARCHAR,\"VERSION\" INTEGER)"); + + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .addAllPartitionFields(Collections.singletonList(dateName)) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.builder().build()) + .performStageVersioning(true) + .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ + String dataPass1 = basePathForInput + "with_partition/max_version/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_partition/max_version/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataForWithPartitionWithVersionInUpperCase(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(9, 0, 6, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ + String dataPass2 = basePathForInput + "with_partition/max_version/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "with_partition/max_version/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataForWithPartitionWithVersionInUpperCase(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(4, 0, 1, 1, 1); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ + options = options.withCleanupStagingData(true); + String dataPass3 = "src/test/resources/data/empty_file.csv";; + String expectedDataPass3 = basePathForExpected + "with_partition/max_version/expected_pass3.csv"; + // 1. Load Staging table + loadStagingDataForWithPartitionWithVersionInUpperCase(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java index f29f4ed594c..56741c184ec 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java @@ -102,7 +102,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ - String dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; // 1. Load Staging table loadBasicStagingData(dataPass3); @@ -159,7 +159,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ - String dataPass3 = basePathForInput + "with_partition/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_partition/expected_pass3.csv"; // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); @@ -218,7 +218,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionFilter() throws Excepti // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch - No Op) ------------------------ IngestMode ingestModeWithNoOpBatchHandling = ingestMode.withEmptyDatasetHandling(NoOp.builder().build()); - String dataPass3 = basePathForInput + "with_partition_filter/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_partition_filter/expected_pass2.csv"; // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); @@ -228,7 +228,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionFilter() throws Excepti // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch - Delete target Data) ------------------------ IngestMode ingestModeWithDeleteTargetData = ingestMode.withEmptyDatasetHandling(DeleteTargetData.builder().build()); - dataPass3 = basePathForInput + "with_partition_filter/staging_data_pass3.csv"; + dataPass3 = "src/test/resources/data/empty_file.csv"; expectedDataPass3 = basePathForExpected + "with_partition_filter/expected_pass3.csv"; // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java index 02b22293a2a..fe429653740 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java @@ -100,7 +100,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ - String dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; // 1. Load Staging table loadBasicStagingData(dataPass3); @@ -118,7 +118,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception .emptyDatasetHandling(FailEmptyBatch.builder().build()) .build(); - dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + dataPass3 = "src/test/resources/data/empty_file.csv"; expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; // 1. Load Staging table loadBasicStagingData(dataPass3); @@ -183,7 +183,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_02); // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ - String dataPass3 = basePathForInput + "with_partition/staging_data_pass3.csv"; + String dataPass3 = "src/test/resources/data/empty_file.csv"; String expectedDataPass3 = basePathForExpected + "with_partition/expected_pass3.csv"; // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java index 8501e66e418..b59eb9776c2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; +import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -36,23 +37,7 @@ import java.util.Map; import java.util.Set; -import static org.finos.legend.engine.persistence.components.TestUtils.assertTableColumnsEquals; -import static org.finos.legend.engine.persistence.components.TestUtils.assertUpdatedDataset; -import static org.finos.legend.engine.persistence.components.TestUtils.createDatasetWithUpdatedField; -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.getColumnDataTypeFromTable; -import static org.finos.legend.engine.persistence.components.TestUtils.getColumnDataTypeLengthFromTable; -import static org.finos.legend.engine.persistence.components.TestUtils.getColumnDataTypeScaleFromTable; -import static org.finos.legend.engine.persistence.components.TestUtils.getIsColumnNullableFromTable; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.mainTableName; -import static org.finos.legend.engine.persistence.components.TestUtils.name; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.testDatabaseName; -import static org.finos.legend.engine.persistence.components.TestUtils.testSchemaName; +import static org.finos.legend.engine.persistence.components.TestUtils.*; class SchemaEvolutionTest extends BaseTest { @@ -74,7 +59,7 @@ void testAddColumn() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -82,7 +67,7 @@ void testAddColumn() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.ADD_COLUMN); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "add_column_data_pass1.csv"; @@ -90,12 +75,8 @@ void testAddColumn() throws Exception // 1. Load staging table loadBasicStagingData(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -110,12 +91,8 @@ void testAddColumn() throws Exception // 2. Load staging table loadBasicStagingData(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1, 0, 1, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_02); } @Test @@ -134,7 +111,7 @@ void testDataTypeConversion() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -142,7 +119,7 @@ void testDataTypeConversion() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.DATA_TYPE_CONVERSION); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "data_type_conversion_data_pass1.csv"; @@ -150,12 +127,8 @@ void testDataTypeConversion() throws Exception // 1. Load staging table loadBasicStagingData(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -171,18 +144,14 @@ void testDataTypeConversion() throws Exception // 2. Load staging table loadBasicStagingData(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1,0,1,0,0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); } @Test void testDataTypeSizeChange() throws Exception { - DatasetDefinition mainTable = TestUtils.getBasicMainTable(); + DatasetDefinition mainTable = TestUtils.getMainTableWithBatchUpdateTimeField(); DatasetDefinition stagingTable = TestUtils.getSchemaEvolutionDataTypeSizeChangeStagingTable(); // Create staging table @@ -195,7 +164,7 @@ void testDataTypeSizeChange() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -203,7 +172,7 @@ void testDataTypeSizeChange() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.DATA_TYPE_SIZE_CHANGE); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "datatype_type_size_change_data_pass1.csv"; @@ -211,12 +180,8 @@ void testDataTypeSizeChange() throws Exception // 1. Load staging table loadStagingDataForIntIncome(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3,0,3,0,0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -234,18 +199,14 @@ void testDataTypeSizeChange() throws Exception // 2. Load staging table loadStagingDataForIntIncome(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1,0,1,0,0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); } @Test void testColumnNullabilityChange() throws Exception { - DatasetDefinition mainTable = TestUtils.getBasicMainTable(); + DatasetDefinition mainTable = TestUtils.getMainTableWithBatchUpdateTimeField(); DatasetDefinition stagingTable = TestUtils.getSchemaEvolutionColumnNullabilityChangeStagingTable(); // Create staging table @@ -258,7 +219,7 @@ void testColumnNullabilityChange() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -266,7 +227,7 @@ void testColumnNullabilityChange() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.COLUMN_NULLABILITY_CHANGE); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "column_nullability_change_data_pass1.csv"; @@ -274,12 +235,8 @@ void testColumnNullabilityChange() throws Exception // 1. Load staging table loadBasicStagingData(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3,0,3,0,0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -295,12 +252,8 @@ void testColumnNullabilityChange() throws Exception // 2. Load staging table loadBasicStagingData(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1,0,1,0,0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); } @Test @@ -319,7 +272,7 @@ void testDataTypeConversionAndColumnNullabilityChange() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -328,7 +281,7 @@ void testDataTypeConversionAndColumnNullabilityChange() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.COLUMN_NULLABILITY_CHANGE); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "data_type_conversion_and_column_nullability_change_data_pass1.csv"; @@ -336,12 +289,8 @@ void testDataTypeConversionAndColumnNullabilityChange() throws Exception // 1. Load staging table loadBasicStagingData(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3,0,3,0,0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -358,18 +307,14 @@ void testDataTypeConversionAndColumnNullabilityChange() throws Exception // 2. Load staging table loadBasicStagingData(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1,0,1,0,0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); } @Test void testDataTypeConversionAndDataTypeSizeChange() throws Exception { - DatasetDefinition mainTable = TestUtils.getBasicMainTable(); + DatasetDefinition mainTable = TestUtils.getMainTableWithBatchUpdateTimeField(); DatasetDefinition stagingTable = TestUtils.getSchemaEvolutionDataTypeConversionAndDataTypeSizeChangeStagingTable(); // Create staging table @@ -382,7 +327,7 @@ void testDataTypeConversionAndDataTypeSizeChange() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -391,7 +336,7 @@ void testDataTypeConversionAndDataTypeSizeChange() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.DATA_TYPE_SIZE_CHANGE); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "data_type_conversion_and_data_type_size_change_data_pass1.csv"; @@ -399,12 +344,8 @@ void testDataTypeConversionAndDataTypeSizeChange() throws Exception // 1. Load staging table loadStagingDataForDecimalIncome(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3,0,3,0,0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -422,18 +363,14 @@ void testDataTypeConversionAndDataTypeSizeChange() throws Exception // 2. Load staging table loadStagingDataForDecimalIncome(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1,0,1,0,0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); } @Test void testMakeMainColumnNullable() throws Exception { - DatasetDefinition mainTable = TestUtils.getBasicMainTable(); + DatasetDefinition mainTable = TestUtils.getMainTableWithBatchUpdateTimeField(); DatasetDefinition stagingTable = TestUtils.getSchemaEvolutionMakeMainColumnNullableStagingTable(); // Create staging table @@ -445,7 +382,7 @@ void testMakeMainColumnNullable() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -453,7 +390,7 @@ void testMakeMainColumnNullable() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.COLUMN_NULLABILITY_CHANGE); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 (Schema Evolution) ------------------------ String dataPass1 = basePathForInput + "make_main_column_nullable_data_pass1.csv"; @@ -461,12 +398,8 @@ void testMakeMainColumnNullable() throws Exception // 1. Load staging table loadStagingDataForWithoutName(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + Map expectedStats = createExpectedStatsMap(3,0,3,0,0); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_01); // 3. Verify schema changes in database List> actualTableData = h2Sink.executeQuery("select * from \"TEST\".\"main\""); assertTableColumnsEquals(Arrays.asList(schema), actualTableData); @@ -482,12 +415,8 @@ void testMakeMainColumnNullable() throws Exception // 2. Load staging table loadStagingDataForWithoutName(dataPass2); // 3. Execute plans and verify results - expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet); + expectedStats = createExpectedStatsMap(1,0,1,0,0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); } @Test @@ -506,7 +435,7 @@ void testSchemaEvolutionFailPKTypeDifferent() throws Exception AppendOnly ingestMode = AppendOnly.builder() .digestField(digestName) .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).enableSchemaEvolution(true).build(); @@ -514,7 +443,7 @@ void testSchemaEvolutionFailPKTypeDifferent() throws Exception schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.DATA_TYPE_CONVERSION); Datasets datasets = Datasets.of(mainTable, stagingTable); - String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchUpdateTimeName}; // ------------ Perform Pass1 ------------------------ String dataPass1 = basePathForInput + "data_type_conversion_data_pass1.csv"; @@ -522,15 +451,11 @@ void testSchemaEvolutionFailPKTypeDifferent() throws Exception // 1. Load staging table loadBasicStagingData(dataPass1); // 2. Execute plans and verify results - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); - expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); - expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); - expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + Map expectedStats = createExpectedStatsMap(3,0,3,0,0); try { - IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet); + IngestorResult result = executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, schemaEvolutionCapabilitySet, fixedClock_2000_01_03); Assertions.fail("Exception was not thrown"); } catch (IncompatibleSchemaChangeException e) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java new file mode 100644 index 00000000000..984e5ab7277 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -0,0 +1,664 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.versioning; + +import org.finos.legend.engine.persistence.components.BaseTest; +import org.finos.legend.engine.persistence.components.TestUtils; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.executor.Executor; +import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; +import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.*; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.finos.legend.engine.persistence.components.TestUtils.*; +import static org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategyAbstract.DATA_SPLIT; +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_STAGING_DATASET_BASE_NAME; + +public class TestDedupAndVersioning extends BaseTest +{ + + /* Scenarios: + 1. No Dedup, NoVersion -> No tempStagingTable + 2. No Dedup, MaxVersion do not perform versioning -> No tempStagingTable + 3. No Dedup, MaxVersion with perform versioning -> tempStagingTable with only MaxVersioned Data [throw Error on Data errors] + 4. No Dedup, AllVersion do not perform versioning -> No tempStagingTable + 5. No Dedup, AllVersion with perform versioning -> tempStagingTable with Data splits [throw Error on Data errors] + + 6. Filter Dups, NoVersion -> tempStagingTable with count column + 7. Filter Dups, MaxVersion do not perform versioning -> tempStagingTable with count column + 8. Filter Dups, MaxVersion with perform versioning -> tempStagingTable with count column and only max version [throw Error on Data errors] + 9. Filter Dups, AllVersion do not perform versioning -> tempStagingTable with count column + 10. Filter Dups, AllVersion with perform versioning -> tempStagingTable with count column and Data splits [throw Error on Data errors] + + 11.Fail on Dups, NoVersion -> tempStagingTable with count column [Throw error on dups] + 12.Fail on Dups, MaxVersion do not perform versioning -> tempStagingTable with count column [Throw error on dups] + 13.Fail on Dups, MaxVersion with perform versioning -> tempStagingTable with count column and only max version [Throw error on dups, throw Error on Data errors] + 14.Fail on Dups, AllVersion do not perform versioning -> tempStagingTable with count column [Throw error on dups] + 15.Fail on Dups, AllVersion with perform versioning -> tempStagingTable with count column and Data splits [Throw error on dups, throw Error on Data errors] + */ + + private static Field name = Field.builder().name(nameName).type(FieldType.of(DataType.VARCHAR, 64, null)).nullable(false).primaryKey(true).fieldAlias(nameName).build(); + + // Base Schema : PK : id, name + public static SchemaDefinition baseSchemaWithoutVersion = + SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(income) + .addFields(expiryDate) + .addFields(digest) + .build(); + + public static SchemaDefinition baseSchemaWithVersion = + SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(version) + .addFields(income) + .addFields(expiryDate) + .addFields(digest) + .build(); + + public static SchemaDefinition baseSchemaWithVersionAndBatch = + SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(version) + .addFields(income) + .addFields(expiryDate) + .addFields(digest) + .addFields(batch) + .build(); + + private static final String tempStagingTableName = stagingTableName + "_" + TEMP_STAGING_DATASET_BASE_NAME; + + String[] schemaWithCount = new String[]{idName, nameName, incomeName, expiryDateName, digestName, "legend_persistence_count"}; + String[] schemaWithVersion = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; + String[] schemaWithVersionAndCount = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName, "legend_persistence_count"}; + String[] schemaWithVersionCountAndDataSplit = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName, "legend_persistence_count", DATA_SPLIT}; + + String[] schemaWithVersionAndDataSplit = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName, DATA_SPLIT}; + + + // Scenario 1 + @Test + void testNoDedupNoVersioning() + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithoutVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .build(); + + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + } + + // Scenario 2 + @Test + void testNoDedupMaxVersioningDoNotPerform() + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + } + + // Scenario 3 + @Test + void testNoDedupMaxVersioning() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + createStagingTableWithVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_max_versioning.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersion); + + // Data error scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + } + } + + // Scenario 4 + @Test + void testNoDedupAllVersioningDoNotPerform() + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = AppendOnly.builder() + .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) + .digestField("digest") + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); + } + + // Scenario 5 + @Test + void testNoDedupAllVersion() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = AppendOnly.builder() + .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) + .digestField("digest") + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(true).build()) + .build(); + + createStagingTableWithVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_all_version.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndDataSplit); + + // Data error scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + } + } + + // Scenario 6 + @Test + void testFilterDupsNoVersioning() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithoutVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + createStagingTableWithoutVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data1_with_dups.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data1_filter_dups_no_versioning.csv"; + loadDataIntoStagingTableWithoutVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithCount); + } + + // Scenario 7 + @Test + void testFilterDupsMaxVersionDoNotPerform() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + createStagingTableWithVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndCount); + } + + + // Scenario 8 + @Test + void testFilterDupsMaxVersion() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + createStagingTableWithVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_max_versioning.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + verifyResults(expectedDataPath, schemaWithVersionAndCount); + + // Data error scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + } + } + + // Scenario 9 + @Test + void testFilterDupsAllVersionDoNotPerform() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = AppendOnly.builder() + .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) + .digestField("digest") + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(false).build()) + .build(); + + createStagingTableWithVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithVersionAndCount); + } + + // Scenario 10 + @Test + void testFilterDupsAllVersion() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = AppendOnly.builder() + .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) + .digestField("digest") + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(true).build()) + .build(); + + createStagingTableWithVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_all_version.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath); + + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit); + + // Data error scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Data errors (same PK, same version but different data), hence failing the batch",e.getMessage()); + } + } + + // Scenario 11 + @Test + void testFailOnDupsNoVersioning() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithoutVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .build(); + createStagingTableWithoutVersion(); + String srcDataPath = "src/test/resources/data/dedup-and-versioning/input/data1_with_dups.csv"; + loadDataIntoStagingTableWithoutVersion(srcDataPath); + + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + } + } + + // Scenario 12 + @Test + void testFailOnDupsMaxVersionDoNotPerform() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + // Happy scenario + createStagingTableWithVersion(); + String srcDataPath1 = "src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath1); + + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithVersionAndCount); + + + // Duplicates scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + } + } + + // Scenario 13 + @Test + void testFailOnDupsMaxVersion() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").performStageVersioning(true).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .build(); + + // Happy scenario + createStagingTableWithVersion(); + String srcDataPath1 = "src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath1); + + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_max_versioin.csv"; + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithVersionAndCount); + + + // Duplicates scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + } + } + + + // Scenario 14 + @Test + void testFailOnDupsAllVersionDoNotPerform() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = AppendOnly.builder() + .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) + .digestField("digest") + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(false).build()) + .build(); + + // Happy scenario + createStagingTableWithVersion(); + String srcDataPath1 = "src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath1); + + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithVersionAndCount); + + + // Duplicates scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + } + } + + // Scenario 15 + @Test + void testFailOnDupsAllVersion() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = getStagingTableWithVersion(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IngestMode ingestMode = AppendOnly.builder() + .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) + .digestField("digest") + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(true).build()) + .build(); + + // Happy scenario + createStagingTableWithVersion(); + String srcDataPath1 = "src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath1); + + String expectedDataPath = "src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv"; + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + verifyResults(expectedDataPath, schemaWithVersionCountAndDataSplit); + + + // Duplicates scenario, should throw error + String srcDataPath2 = "src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv"; + loadDataIntoStagingTableWithVersion(srcDataPath2); + try + { + performDedupAndVersioining(datasets, ingestMode); + Assertions.fail("Should not succeed"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy",e.getMessage()); + } + } + + + public static DatasetDefinition getStagingTableWithoutVersion() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(baseSchemaWithoutVersion) + .build(); + } + + private Dataset getTempStagingDataset() + { + return DatasetReferenceImpl.builder() + .group(testSchemaName) + .name(tempStagingTableName) + .build(); + } + + public static DatasetDefinition getStagingTableWithVersion() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(baseSchemaWithVersion) + .build(); + } + + + public static void createStagingTableWithoutVersion() + { + String createSql = "CREATE TABLE IF NOT EXISTS \"TEST\".\"staging\"" + + "(\"id\" INTEGER NOT NULL," + + "\"name\" VARCHAR(64) NOT NULL," + + "\"income\" BIGINT," + + "\"expiry_date\" DATE," + + "\"digest\" VARCHAR)"; + h2Sink.executeStatement(createSql); + } + + public static void createStagingTableWithVersion() + { + String createSql = "CREATE TABLE IF NOT EXISTS \"TEST\".\"staging\"" + + "(\"id\" INTEGER NOT NULL," + + "\"name\" VARCHAR(64) NOT NULL," + + "\"version\" INTEGER," + + "\"income\" BIGINT," + + "\"expiry_date\" DATE," + + "\"digest\" VARCHAR)"; + h2Sink.executeStatement(createSql); + } + + private static void performDedupAndVersioining(Datasets datasets, IngestMode ingestMode) + { + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .build(); + + Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); + datasets = ingestor.create(datasets); + datasets = ingestor.dedupAndVersion(datasets); + } + + public static void loadDataIntoStagingTableWithoutVersion(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + + "INSERT INTO \"TEST\".\"staging\"(id, name, income ,expiry_date, digest) " + + "SELECT CONVERT( \"id\",INT ), \"name\", CONVERT( \"income\", BIGINT), CONVERT( \"expiry_date\", DATE), digest" + + " FROM CSVREAD( '" + path + "', 'id, name, income, expiry_date, digest', NULL )"; + h2Sink.executeStatement(loadSql); + } + + public static void loadDataIntoStagingTableWithVersion(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + + "INSERT INTO \"TEST\".\"staging\"(id, name, version, income ,expiry_date, digest) " + + "SELECT CONVERT( \"id\",INT ), \"name\", CONVERT( \"version\",INT ), CONVERT( \"income\", BIGINT), CONVERT( \"expiry_date\", DATE), digest" + + " FROM CSVREAD( '" + path + "', 'id, name, version, income, expiry_date, digest', NULL )"; + h2Sink.executeStatement(loadSql); + } + + public static void loadDataIntoStagingTableWithVersionAndBatch(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + + "INSERT INTO \"TEST\".\"staging\"(id, name, version, income ,expiry_date, digest, batch) " + + "SELECT CONVERT( \"id\",INT ), \"name\", CONVERT( \"version\",INT ), CONVERT( \"income\", BIGINT), CONVERT( \"expiry_date\", DATE), digest, CONVERT( \"batch\",INT )" + + " FROM CSVREAD( '" + path + "', 'id, name, version, income, expiry_date, digest, batch', NULL )"; + h2Sink.executeStatement(loadSql); + } + + private void verifyResults(String expectedDataPath, String [] schema) throws IOException + { + Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset())); + List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", tempStagingTableName)); + TestUtils.assertFileAndTableDataEquals(schema, expectedDataPath, tableData); + } + + +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass1.csv index bae437bf7b1..f699187454f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass1.csv @@ -1,2 +1,2 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass2.csv index 36b63f8e77c..7e300c3a802 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass2.csv @@ -1,4 +1,4 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 -1001,229000,DIGEST3,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1001,229000,DIGEST3,1,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass3.csv index cacfb889f8c..e67bc9b93aa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass3.csv @@ -1,5 +1,5 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 -1001,229000,DIGEST3,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1001,229000,DIGEST3,1,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass4.csv index ca96c022fce..97a06baa08c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/expected_pass4.csv @@ -1,7 +1,7 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,3 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 -1001,229000,DIGEST3,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,3 -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,4,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,3 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1001,229000,DIGEST3,1,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,3 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,4,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv index bae437bf7b1..f699187454f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv @@ -1,2 +1,2 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv index 36b63f8e77c..7e300c3a802 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv @@ -1,4 +1,4 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 -1001,229000,DIGEST3,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1001,229000,DIGEST3,1,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv index cacfb889f8c..e67bc9b93aa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv @@ -1,5 +1,5 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 -1001,229000,DIGEST3,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1001,229000,DIGEST3,1,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv index cacfb889f8c..e67bc9b93aa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv @@ -1,5 +1,5 @@ -1001,225000,DIGEST1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 -1001,229000,DIGEST3,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 -1001,290000,DIGEST2,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 +1001,225000,DIGEST1,1,2022-01-11 00:00:00.0,2022-02-24 00:00:00.0,1,999999999 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1001,229000,DIGEST3,1,2022-05-08 00:00:00.0,9999-12-31 23:59:59.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,2022-05-08 00:00:00.0,2,2 +1001,290000,DIGEST2,1,2022-02-24 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass1.csv index 79a2fb132e7..bc679dc2d35 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass1.csv @@ -1,2 +1,2 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass2.csv index 6701330e84d..6e3151022e0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass2.csv @@ -1,4 +1,4 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass3.csv index 889b5acb9b5..19ff341087a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass3.csv @@ -1,5 +1,5 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,122000,DIGEST4,2,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass4.csv index 6d5ced14d6f..f6baa74cc08 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass4.csv @@ -1,6 +1,6 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 -39915188,110000,DIGEST5,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,122000,DIGEST4,2,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 +39915188,110000,DIGEST5,3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass5.csv index 694efaebdd0..1c119448841 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass5.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass5.csv @@ -1,7 +1,7 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,4 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 -39915188,110000,DIGEST5,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,5,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,4 +39915188,122000,DIGEST4,2,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 +39915188,110000,DIGEST5,3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,5,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass6.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass6.csv deleted file mode 100644 index 07bc95699dc..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/expected_pass6.csv +++ /dev/null @@ -1,8 +0,0 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,4 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 -39915188,110000,DIGEST5,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,5,5 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,6,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv index 79a2fb132e7..bc679dc2d35 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass1.csv @@ -1,2 +1,2 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv index 6701330e84d..6e3151022e0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass2.csv @@ -1,4 +1,4 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv index 889b5acb9b5..19ff341087a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass3.csv @@ -1,5 +1,5 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,122000,DIGEST4,2,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv index 6d5ced14d6f..f6baa74cc08 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass4.csv @@ -1,6 +1,6 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 -39915188,110000,DIGEST5,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,122000,DIGEST4,2,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 +39915188,110000,DIGEST5,3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass5.csv index 6d5ced14d6f..f6baa74cc08 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass5.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass5.csv @@ -1,6 +1,6 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 -39915188,110000,DIGEST5,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 +39915188,1250000,DIGEST1,1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 +39915188,120000,DIGEST3,1,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 +39915188,124000,DIGEST2,1,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 +39915188,122000,DIGEST4,2,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 +39915188,110000,DIGEST5,3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass6.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass6.csv deleted file mode 100644 index 6d5ced14d6f..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/expected_pass6.csv +++ /dev/null @@ -1,6 +0,0 @@ -39915188,1250000,DIGEST1,2022-04-30 00:00:00.0,2022-05-31 00:00:00.0,1,999999999 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,9999-12-31 23:59:59.0,1,1 -39915188,120000,DIGEST3,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,2,2 -39915188,124000,DIGEST2,2022-05-31 00:00:00.0,2022-09-30 00:00:00.0,2,999999999 -39915188,122000,DIGEST4,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,3,3 -39915188,110000,DIGEST5,2022-09-30 00:00:00.0,9999-12-31 23:59:59.0,4,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass1.csv index 8fb30d52548..cb737cd2666 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass1.csv @@ -1,2 +1,2 @@ -1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,5 -1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,5 +1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,1,5 +1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,1,5 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass2.csv index bc24ee76372..6963a2a181a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass2.csv @@ -1,2 +1,2 @@ -1001,2022-05-08 00:00:00.0,229000,DIGEST3,1,1 -1001,2022-05-08 00:00:00.0,123456,DIGEST4,0,2 \ No newline at end of file +1001,2022-05-08 00:00:00.0,229000,DIGEST3,1,1,1 +1001,2022-05-08 00:00:00.0,123456,DIGEST4,2,0,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass3.csv index 6323e4d32a2..0938248ca4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_3_with_data_split/staging_data_pass3.csv @@ -1,2 +1,2 @@ -1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,70 -1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,70 \ No newline at end of file +1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,1,70 +1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,1,70 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv index 8fb30d52548..cb737cd2666 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv @@ -1,2 +1,2 @@ -1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,5 -1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,5 +1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,1,5 +1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,1,5 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv index bc24ee76372..6963a2a181a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv @@ -1,2 +1,2 @@ -1001,2022-05-08 00:00:00.0,229000,DIGEST3,1,1 -1001,2022-05-08 00:00:00.0,123456,DIGEST4,0,2 \ No newline at end of file +1001,2022-05-08 00:00:00.0,229000,DIGEST3,1,1,1 +1001,2022-05-08 00:00:00.0,123456,DIGEST4,2,0,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv index 6323e4d32a2..0938248ca4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/with_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv @@ -1,2 +1,2 @@ -1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,70 -1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,70 \ No newline at end of file +1001,2022-01-11 00:00:00.0,225000,DIGEST1,1,1,70 +1001,2022-02-24 00:00:00.0,290000,DIGEST2,1,1,70 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass1.csv index 4226bf7a05a..a7ef24a3b87 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass1.csv @@ -1,2 +1,2 @@ -39915188,2022-04-30 00:00:00.0,1250000,DIGEST1,1 -39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1 +39915188,2022-04-30 00:00:00.0,1250000,DIGEST1,1,1 +39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass2.csv index 123606a3c79..e2b4e07e182 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass2.csv @@ -1,3 +1,3 @@ -39915188,2022-09-30 00:00:00.0,120000,DIGEST3,1 -39915188,2022-09-30 00:00:00.0,122000,DIGEST4,2 -39915188,2022-09-30 00:00:00.0,110000,DIGEST5,99 +39915188,2022-09-30 00:00:00.0,120000,DIGEST3,1,1 +39915188,2022-09-30 00:00:00.0,122000,DIGEST4,2,2 +39915188,2022-09-30 00:00:00.0,110000,DIGEST5,3,99 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass3.csv index c7ed61a2f09..478a83237e0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_3_with_data_split/staging_data_pass3.csv @@ -1,2 +1 @@ -39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1 -39915188,2022-05-31 00:00:00.0,124000,DIGEST2,2 +39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv index 4226bf7a05a..a7ef24a3b87 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass1.csv @@ -1,2 +1,2 @@ -39915188,2022-04-30 00:00:00.0,1250000,DIGEST1,1 -39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1 +39915188,2022-04-30 00:00:00.0,1250000,DIGEST1,1,1 +39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv index 123606a3c79..e2b4e07e182 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass2.csv @@ -1,3 +1,3 @@ -39915188,2022-09-30 00:00:00.0,120000,DIGEST3,1 -39915188,2022-09-30 00:00:00.0,122000,DIGEST4,2 -39915188,2022-09-30 00:00:00.0,110000,DIGEST5,99 +39915188,2022-09-30 00:00:00.0,120000,DIGEST3,1,1 +39915188,2022-09-30 00:00:00.0,122000,DIGEST4,2,2 +39915188,2022-09-30 00:00:00.0,110000,DIGEST5,3,99 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv index c7ed61a2f09..478a83237e0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_5_with_data_split_filter_duplicates/staging_data_pass3.csv @@ -1,2 +1 @@ -39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1 -39915188,2022-05-31 00:00:00.0,124000,DIGEST2,2 +39915188,2022-05-31 00:00:00.0,124000,DIGEST2,1,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/less_columns_in_staging/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/less_columns_in_staging/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/less_columns_in_staging/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/with_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/with_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/with_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/without_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/without_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from_and_through/without_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-snapshot-milestoning/input/batch_id_based/with_partition/staging_data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-snapshot-milestoning/input/batch_id_based/with_partition/staging_data_pass4.csv deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-snapshot-milestoning/input/batch_id_based/without_partition/staging_data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-snapshot-milestoning/input/batch_id_based/without_partition/staging_data_pass4.csv deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data1_filter_dups_no_versioning.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data1_filter_dups_no_versioning.csv new file mode 100644 index 00000000000..f525044432d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data1_filter_dups_no_versioning.csv @@ -0,0 +1,3 @@ +1,Andy,1000,2012-01-01,digest1,3 +2,Becky,2000,2012-01-02,digest2,2 +3,Cathy,3000,2012-01-03,digest3,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_all_version.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_all_version.csv new file mode 100644 index 00000000000..1b6ac7f8c1b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_all_version.csv @@ -0,0 +1,6 @@ +1,Andy,1,1000,2012-01-01,digest1,1 +1,Andy,2,2000,2012-01-02,digest2,2 +1,Andy,3,3000,2012-01-03,digest3,3 +2,Becky,1,4000,2012-01-04,digest4,1 +2,Becky,1,4000,2012-01-04,digest4,1 +3,Cathy,1,5000,2012-01-05,digest5,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_max_versioning.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_max_versioning.csv new file mode 100644 index 00000000000..d49b849d319 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_allow_dups_max_versioning.csv @@ -0,0 +1,4 @@ +1,Andy,3,3000,2012-01-03,digest3 +2,Becky,1,4000,2012-01-04,digest4 +2,Becky,1,4000,2012-01-04,digest4 +3,Cathy,1,5000,2012-01-05,digest5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_all_version.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_all_version.csv new file mode 100644 index 00000000000..aff93491809 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_all_version.csv @@ -0,0 +1,5 @@ +1,Andy,1,1000,2012-01-01,digest1,1,1 +1,Andy,2,2000,2012-01-02,digest2,1,2 +1,Andy,3,3000,2012-01-03,digest3,1,3 +2,Becky,1,4000,2012-01-04,digest4,2,1 +3,Cathy,1,5000,2012-01-05,digest5,1,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_max_versioning.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_max_versioning.csv new file mode 100644 index 00000000000..caebf714b95 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_max_versioning.csv @@ -0,0 +1,3 @@ +1,Andy,3,3000,2012-01-03,digest3,1 +2,Becky,1,4000,2012-01-04,digest4,2 +3,Cathy,1,5000,2012-01-05,digest5,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv new file mode 100644 index 00000000000..1531598979a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data2_filter_dups_no_versioning.csv @@ -0,0 +1,5 @@ +1,Andy,1,1000,2012-01-01,digest1,1 +1,Andy,2,2000,2012-01-02,digest2,1 +1,Andy,3,3000,2012-01-03,digest3,1 +2,Becky,1,4000,2012-01-04,digest4,2 +3,Cathy,1,5000,2012-01-05,digest5,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_all_version.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_all_version.csv new file mode 100644 index 00000000000..aa49707585d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_all_version.csv @@ -0,0 +1,5 @@ +1,Andy,1,1000,2012-01-01,digest1,1 +1,Andy,2,2000,2012-01-02,digest2,1 +1,Andy,3,3000,2012-01-03,digest3,1 +2,Becky,1,4000,2012-01-04,digest4,1 +3,Cathy,1,5000,2012-01-05,digest5,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_max_versioin.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_max_versioin.csv new file mode 100644 index 00000000000..db3e9b770d6 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_max_versioin.csv @@ -0,0 +1,3 @@ +1,Andy,3,3000,2012-01-03,digest3,1 +2,Becky,1,4000,2012-01-04,digest4,1 +3,Cathy,1,5000,2012-01-05,digest5,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv new file mode 100644 index 00000000000..dab62a825e0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/expected_data4_fail_on_dups_no_versioning.csv @@ -0,0 +1,5 @@ +1,Andy,1,1000,2012-01-01,digest1,1,1 +1,Andy,2,2000,2012-01-02,digest2,1,2 +1,Andy,3,3000,2012-01-03,digest3,1,3 +2,Becky,1,4000,2012-01-04,digest4,1,1 +3,Cathy,1,5000,2012-01-05,digest5,1,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data1_with_dups.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data1_with_dups.csv new file mode 100644 index 00000000000..5d2b28c46a9 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data1_with_dups.csv @@ -0,0 +1,6 @@ +1,Andy,1000,2012-01-01,digest1 +1,Andy,1000,2012-01-01,digest1 +1,Andy,1000,2012-01-01,digest1 +2,Becky,2000,2012-01-02,digest2 +2,Becky,2000,2012-01-02,digest2 +3,Cathy,3000,2012-01-03,digest3 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv new file mode 100644 index 00000000000..24b93e93ed7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data2_with_dups_no_data_error.csv @@ -0,0 +1,6 @@ +1,Andy,1,1000,2012-01-01,digest1 +1,Andy,2,2000,2012-01-02,digest2 +1,Andy,3,3000,2012-01-03,digest3 +2,Becky,1,4000,2012-01-04,digest4 +2,Becky,1,4000,2012-01-04,digest4 +3,Cathy,1,5000,2012-01-05,digest5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv new file mode 100644 index 00000000000..d2402f02f2e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data3_with_dups_and_data_error.csv @@ -0,0 +1,7 @@ +1,Andy,1,1000,2012-01-01,digest1 +1,Andy,2,2000,2012-01-02,digest2 +1,Andy,3,3000,2012-01-03,digest3 +2,Becky,1,4000,2012-01-04,digest4 +2,Becky,1,4000,2012-01-04,digest4 +3,Cathy,1,5000,2012-01-05,digest5 +3,Cathy,1,6000,2012-01-06,digest6 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv new file mode 100644 index 00000000000..9f68e1a3650 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data4_without_dups_no_data_error.csv @@ -0,0 +1,5 @@ +1,Andy,1,1000,2012-01-01,digest1 +1,Andy,2,2000,2012-01-02,digest2 +1,Andy,3,3000,2012-01-03,digest3 +2,Becky,1,4000,2012-01-04,digest4 +3,Cathy,1,5000,2012-01-05,digest5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data5_without_dups.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data5_without_dups.csv new file mode 100644 index 00000000000..06a96f0cbc1 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/input/data5_without_dups.csv @@ -0,0 +1,3 @@ +1,Andy,1000,2012-01-01,digest1 +2,Becky,2000,2012-01-02,digest2 +3,Cathy,3000,2012-01-03,digest3 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-snapshot-milestoning/input/batch_id_based/has_from_time_only/staging_data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/empty_file.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-snapshot-milestoning/input/batch_id_based/has_from_time_only/staging_data_pass4.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/empty_file.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/allow_duplicates/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/allow_duplicates/expected_pass1.csv deleted file mode 100644 index a4b3d191440..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/allow_duplicates/expected_pass1.csv +++ /dev/null @@ -1,3 +0,0 @@ -HARRY,1000,2020-01-01 -ROBERT,2000,2020-01-02 -ANDY,3000,2020-01-03 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/allow_duplicates/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/allow_duplicates/expected_pass2.csv deleted file mode 100644 index aff27dff686..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/allow_duplicates/expected_pass2.csv +++ /dev/null @@ -1,6 +0,0 @@ -HARRY,1000,2020-01-01 -ROBERT,2000,2020-01-02 -ANDY,3000,2020-01-03 -HARRY,1000,2020-01-01 -ROBERT,2000,2020-01-02 -ANDY,3000,2020-01-03 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_filter_existing/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_filter_existing/expected_pass1.csv new file mode 100644 index 00000000000..e780c270f99 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_filter_existing/expected_pass1.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:02.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:02.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,2000-01-01 00:00:02.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:03.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_filter_existing/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_filter_existing/expected_pass2.csv new file mode 100644 index 00000000000..b7f4ef8b6d5 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_filter_existing/expected_pass2.csv @@ -0,0 +1,6 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:02.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:02.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,2000-01-01 00:00:02.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:03.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2,2000-01-01 00:00:06.0 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1,2000-01-01 00:00:06.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass1.csv new file mode 100644 index 00000000000..e780c270f99 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass1.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:02.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:02.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,2000-01-01 00:00:02.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:03.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass2.csv new file mode 100644 index 00000000000..1827545ff1d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_all_version_filter_dup_no_filter_existing/expected_pass2.csv @@ -0,0 +1,7 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:02.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:02.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,2000-01-01 00:00:02.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:03.0 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:06.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2,2000-01-01 00:00:06.0 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1,2000-01-01 00:00:06.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_filter_existing/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_filter_existing/expected_pass1.csv new file mode 100644 index 00000000000..8227e12357a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_filter_existing/expected_pass1.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:00.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_filter_existing/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_filter_existing/expected_pass2.csv new file mode 100644 index 00000000000..1f794ed0800 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_filter_existing/expected_pass2.csv @@ -0,0 +1,5 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:00.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:00.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2,2000-01-02 00:00:00.123456 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1,2000-01-02 00:00:00.123456 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass1.csv new file mode 100644 index 00000000000..8227e12357a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass1.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:00.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass2.csv new file mode 100644 index 00000000000..35c1fd7f597 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_max_version_filter_dup_no_filter_existing/expected_pass2.csv @@ -0,0 +1,6 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2000-01-01 00:00:00.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2,2000-01-01 00:00:00.0 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2000-01-02 00:00:00.123456 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2,2000-01-02 00:00:00.123456 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1,2000-01-02 00:00:00.123456 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_update_timestamp_field/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_no_version_filter_dup_filter_existing/expected_pass1.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_update_timestamp_field/expected_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_no_version_filter_dup_filter_existing/expected_pass1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_no_version_filter_dup_filter_existing/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_no_version_filter_dup_filter_existing/expected_pass2.csv new file mode 100644 index 00000000000..c7b0757ee0f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/auditing_no_version_filter_dup_filter_existing/expected_pass2.csv @@ -0,0 +1,5 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.123456 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,2000-01-02 00:00:00.123456 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/import_with_populate_digest/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/import_with_populate_digest/expected_pass1.csv new file mode 100644 index 00000000000..305d57e103e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/import_with_populate_digest/expected_pass1.csv @@ -0,0 +1,5 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,ec557ebad89621a74ee47c6520bf7b74,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,45c518d26f7530c57290c3f609042d58,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,e030e08bd1d885dd79ff22ff40e77917,2000-01-01 00:00:00.0 +4,MICHEL,4000,2020-01-04 00:00:00.0,2022-12-04,9aa7c323d0ef36f5319cb0e1b3b5fb79,2000-01-01 00:00:00.0 +5,LIZA,5000,2020-01-05 00:00:00.0,2022-12-05,1169c3b3ca193e3b91d1af481d6030a7,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/import_with_populate_digest/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/import_with_populate_digest/expected_pass2.csv new file mode 100644 index 00000000000..baac02321a8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/import_with_populate_digest/expected_pass2.csv @@ -0,0 +1,6 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,ec557ebad89621a74ee47c6520bf7b74,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,45c518d26f7530c57290c3f609042d58,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,e030e08bd1d885dd79ff22ff40e77917,2000-01-01 00:00:00.0 +4,MICHEL,4000,2020-01-04 00:00:00.0,2022-12-04,9aa7c323d0ef36f5319cb0e1b3b5fb79,2000-01-01 00:00:00.0 +5,LIZA,5000,2020-01-05 00:00:00.0,2022-12-05,1169c3b3ca193e3b91d1af481d6030a7,2000-01-01 00:00:00.0 +6,BRAD,6000,2020-01-06 00:00:00.0,2022-12-06,c3c51a5f44766686c1ca456de687821c,2000-01-02 00:00:00.123456 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass1.csv index 0cd30c5ef8e..8a7e93d21a6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3 +1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass2.csv index b873fc394be..261724f1d36 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/less_columns_in_staging/expected_pass2.csv @@ -1,5 +1,6 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3 -3,ANDY,3100,2020-01-06 00:00:00.0,null,DIGEST3_UPDATED -4,MATT,4000,2020-01-04 00:00:00.0,null,DIGEST4 \ No newline at end of file +1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-02 00:00:00.123456 +3,ANDY,3100,2020-01-06 00:00:00.0,null,DIGEST3_UPDATED,2000-01-02 00:00:00.123456 +4,MATT,4000,2020-01-04 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.123456 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass1.csv index 9d7b8b2d1b7..179b54e57d8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +HARRY,1000,2022-12-01 +ROBERT,2000,2022-12-02 +ANDY,3000,2022-12-03 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass2.csv index 4d3a4826e15..2c6727854a3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/vanilla_case/expected_pass2.csv @@ -1,5 +1,6 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -3,ANDY,3100,2020-01-06 00:00:00.0,2022-12-03,DIGEST3_UPDATED -4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 \ No newline at end of file +HARRY,1000,2022-12-01 +ROBERT,2000,2022-12-02 +ANDY,3000,2022-12-03 +ROBERT,2000,2022-12-02 +ANDY,3100,2022-12-03 +MATT,4000,2022-12-06 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_data_splits/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_data_splits/expected_pass1.csv deleted file mode 100644 index ae342aa1142..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_data_splits/expected_pass1.csv +++ /dev/null @@ -1,5 +0,0 @@ -2000-01-01 00:00:02.0,1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2000-01-01 00:00:02.0,2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -2000-01-01 00:00:02.0,3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -2000-01-01 00:00:03.0,1,HARRISON,11000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4 -2000-01-01 00:00:03.0,2,ROBERTO,21000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/allow_duplicates/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/allow_duplicates/data_pass1.csv deleted file mode 100644 index 351fdf33161..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/allow_duplicates/data_pass1.csv +++ /dev/null @@ -1,3 +0,0 @@ -HARRY,1000,2020-01-01 -ROBERT,2000,2020-01-02 -ANDY,3000,2020-01-03 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/allow_duplicates/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/allow_duplicates/data_pass2.csv deleted file mode 100644 index a4b3d191440..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/allow_duplicates/data_pass2.csv +++ /dev/null @@ -1,3 +0,0 @@ -HARRY,1000,2020-01-01 -ROBERT,2000,2020-01-02 -ANDY,3000,2020-01-03 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_data_splits/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_filter_existing/data_pass1.csv similarity index 58% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_data_splits/data_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_filter_existing/data_pass1.csv index 26ff40ce596..35120edb7c4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_data_splits/data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_filter_existing/data_pass1.csv @@ -1,5 +1,4 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 -1,HARRISON,11000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2 -2,ROBERTO,21000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,2 \ No newline at end of file +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_filter_existing/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_filter_existing/data_pass2.csv new file mode 100644 index 00000000000..9a2eb4ce32c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_filter_existing/data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_data_splits/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_no_filter_existing/data_pass1.csv similarity index 58% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_data_splits/data_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_no_filter_existing/data_pass1.csv index 26ff40ce596..35120edb7c4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_data_splits/data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_no_filter_existing/data_pass1.csv @@ -1,5 +1,4 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 -1,HARRISON,11000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2 -2,ROBERTO,21000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,2 \ No newline at end of file +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_no_filter_existing/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_no_filter_existing/data_pass2.csv new file mode 100644 index 00000000000..9a2eb4ce32c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_all_version_filter_dup_no_filter_existing/data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_filter_existing/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_filter_existing/data_pass1.csv new file mode 100644 index 00000000000..35120edb7c4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_filter_existing/data_pass1.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_filter_existing/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_filter_existing/data_pass2.csv new file mode 100644 index 00000000000..9a2eb4ce32c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_filter_existing/data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_no_filter_existing/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_no_filter_existing/data_pass1.csv new file mode 100644 index 00000000000..35120edb7c4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_no_filter_existing/data_pass1.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_no_filter_existing/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_no_filter_existing/data_pass2.csv new file mode 100644 index 00000000000..9a2eb4ce32c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_max_version_filter_dup_no_filter_existing/data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST5,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_update_timestamp_field/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_no_version_filter_dup_filter_existing/data_pass1.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_update_timestamp_field/data_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_no_version_filter_dup_filter_existing/data_pass1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_no_version_filter_dup_filter_existing/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_no_version_filter_dup_filter_existing/data_pass2.csv new file mode 100644 index 00000000000..cbfdfc4dd21 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/auditing_no_version_filter_dup_filter_existing/data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/import_with_populate_digest/data_pass1.json b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/import_with_populate_digest/data_pass1.json new file mode 100644 index 00000000000..d32d0bf5fd2 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/import_with_populate_digest/data_pass1.json @@ -0,0 +1,37 @@ +[ + { + "id": 1, + "name": "HARRY", + "income": 1000, + "start_time": "2020-01-01 00:00:00.0", + "expiry_date": "2022-12-01" + }, + { + "id": 2, + "name": "ROBERT", + "income": 2000, + "start_time": "2020-01-02 00:00:00.0", + "expiry_date": "2022-12-02" + }, + { + "id": 3, + "name": "ANDY", + "income": 3000, + "start_time": "2020-01-03 00:00:00.0", + "expiry_date": "2022-12-03" + }, + { + "id": 4, + "name": "MICHEL", + "income": 4000, + "start_time": "2020-01-04 00:00:00.0", + "expiry_date": "2022-12-04" + }, + { + "id": 5, + "name": "LIZA", + "income": 5000, + "start_time": "2020-01-05 00:00:00.0", + "expiry_date": "2022-12-05" + } +] \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/import_with_populate_digest/data_pass2.json b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/import_with_populate_digest/data_pass2.json new file mode 100644 index 00000000000..4e8e3c3c81a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/import_with_populate_digest/data_pass2.json @@ -0,0 +1,16 @@ +[ + { + "id": 1, + "name": "HARRY", + "income": 1000, + "start_time": "2020-01-01 00:00:00.0", + "expiry_date": "2022-12-01" + }, + { + "id": 6, + "name": "BRAD", + "income": 6000, + "start_time": "2020-01-06 00:00:00.0", + "expiry_date": "2022-12-06" + } +] \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass1.csv index 9d7b8b2d1b7..179b54e57d8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +HARRY,1000,2022-12-01 +ROBERT,2000,2022-12-02 +ANDY,3000,2022-12-03 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass2.csv index d7d8ccdcc2b..7bf2b920ce4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/vanilla_case/data_pass2.csv @@ -1,3 +1,3 @@ -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3100,2020-01-06 00:00:00.0,2022-12-03,DIGEST3_UPDATED -4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 \ No newline at end of file +ROBERT,2000,2022-12-02 +ANDY,3100,2022-12-03 +MATT,4000,2022-12-06 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/digest_based/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/digest_based/expected_pass1.csv new file mode 100644 index 00000000000..bc189099ff2 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/digest_based/expected_pass1.csv @@ -0,0 +1,3 @@ +1,Andy,3,13000,2012-01-03,digest13 +2,Becky,1,21000,2012-02-01,digest21 +3,Cathy,1,31000,2012-03-01,digest31 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/digest_based/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/digest_based/expected_pass2.csv new file mode 100644 index 00000000000..b6e0765827d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/digest_based/expected_pass2.csv @@ -0,0 +1,4 @@ +1,Andy,2,12000,2012-01-02,digest12 +2,Becky,2,22000,2012-02-02,digest22 +3,Cathy,1,31000,2012-03-01,digest31 +4,Dexter,1,41000,2012-04-01,digest41 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/greater_than/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/greater_than/expected_pass1.csv new file mode 100644 index 00000000000..bc189099ff2 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/greater_than/expected_pass1.csv @@ -0,0 +1,3 @@ +1,Andy,3,13000,2012-01-03,digest13 +2,Becky,1,21000,2012-02-01,digest21 +3,Cathy,1,31000,2012-03-01,digest31 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/greater_than/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/greater_than/expected_pass2.csv new file mode 100644 index 00000000000..a3af00b3312 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/expected/with_staging_filter/with_all_version/greater_than/expected_pass2.csv @@ -0,0 +1,4 @@ +1,Andy,3,13000,2012-01-03,digest13 +2,Becky,2,22000,2012-02-02,digest22 +3,Cathy,1,31000,2012-03-01,digest31 +4,Dexter,1,41000,2012-04-01,digest41 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_duplicates/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_duplicates/data_pass1.csv new file mode 100644 index 00000000000..08f5d7a15ac --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_duplicates/data_pass1.csv @@ -0,0 +1,2 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/digest_based/data1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/digest_based/data1.csv new file mode 100644 index 00000000000..97a953361ef --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/digest_based/data1.csv @@ -0,0 +1,8 @@ +1,Andy,1,11000,2012-01-01,digest11,1 +1,Andy,2,12000,2012-01-02,digest12,1 +1,Andy,3,13000,2012-01-03,digest13,1 +2,Becky,1,21000,2012-02-01,digest21,1 +3,Cathy,1,31000,2012-03-01,digest31,1 +1,Andy,2,12000,2012-01-02,digest12,2 +2,Becky,2,22000,2012-02-02,digest22,2 +4,Dexter,1,41000,2012-04-01,digest41,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/greater_than/data1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/greater_than/data1.csv new file mode 100644 index 00000000000..64eae01aec9 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/greater_than/data1.csv @@ -0,0 +1,10 @@ +1,Andy,1,11000,2012-01-01,digest11,1 +1,Andy,2,12000,2012-01-02,digest12,1 +1,Andy,3,13000,2012-01-03,digest13,1 +2,Becky,1,21000,2012-02-01,digest21,1 +2,Becky,1,21000,2012-02-01,digest21,1 +3,Cathy,1,31000,2012-03-01,digest31,1 +1,Andy,2,12000,2012-01-02,digest12,2 +1,Andy,2,12000,2012-01-02,digest12,2 +2,Becky,2,22000,2012-02-02,digest22,2 +4,Dexter,1,41000,2012-04-01,digest41,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than/with_dedup/data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than/with_dedup/data_pass3.csv new file mode 100644 index 00000000000..fdf9e3fea09 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than/with_dedup/data_pass3.csv @@ -0,0 +1,2 @@ +4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST47,7,7 +4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST47,7,7 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass1.csv index 84ec7cae780..37ccb3a509c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass1.csv @@ -4,3 +4,4 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,2 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2,2 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,5,2 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,5,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass2.csv index 561a4e6f029..8748e2b4465 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/data_pass2.csv @@ -12,5 +12,7 @@ 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST23,3,3 3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-03,DIGEST31,1,3 3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-03,DIGEST33,5,4 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-03,DIGEST33,5,4 4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST41,1,5 +4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST47,7,6 4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST47,7,6 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_update_timestamp_field/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_update_timestamp_field/data_pass1.csv index 9d7b8b2d1b7..10814030657 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_update_timestamp_field/data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-delta-milestoning/input/with_update_timestamp_field/data_pass1.csv @@ -1,3 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass1.csv index 9d7b8b2d1b7..e4921df1a11 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass2.csv index 76b44d94f72..5ab3edf0db3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/add_column_expected_pass2.csv @@ -1,4 +1,4 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.123456 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass1.csv index 4e9e555440a..4d09b0772f5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass2.csv index 0e48a170f07..526c680fd9c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/column_nullability_change_expected_pass2.csv @@ -1,4 +1,4 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,null,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 \ No newline at end of file +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,null,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-03 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass1.csv index c4c8a8fb29e..cbdc6309ace 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,null,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,null,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass2.csv index 03a81404ca3..aa2c7d68969 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_column_nullability_change_expected_pass2.csv @@ -1,4 +1,4 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,null,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,MATT,null,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,null,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,MATT,null,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-03 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass1.csv index caf2ced1aa5..9c90abbf159 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000.00,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000.00,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000.00,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +1,HARRY,1000.00,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000.00,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000.00,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass2.csv index 457f71bc1f4..6676b07163f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_and_data_type_size_change_expected_pass2.csv @@ -1,4 +1,4 @@ -1,HARRY,1000.00,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000.00,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000.00,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,MATT,4000.00,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 +1,HARRY,1000.00,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000.00,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000.00,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,MATT,4000.00,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-03 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass1.csv index 9d7b8b2d1b7..e4921df1a11 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass2.csv index 76b44d94f72..f4da0dda441 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/data_type_conversion_expected_pass2.csv @@ -1,4 +1,4 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-03 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass1.csv index 72cc5edbebc..f0ce511cc02 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass1.csv @@ -1,3 +1,3 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 \ No newline at end of file +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass2.csv index 61b3755ed1f..1d3e0db847b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/datatype_type_size_change_expected_pass2.csv @@ -1,4 +1,4 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 \ No newline at end of file +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,MATT,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-03 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass1.csv index 4ea8ae5aa6f..390bc643cdf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass1.csv @@ -1,3 +1,3 @@ -1,null,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,null,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +1,null,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,null,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass2.csv index 282156272f6..49917de2ef8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/schema-evolution/expected/make_main_column_nullable_expected_pass2.csv @@ -1,4 +1,4 @@ -1,null,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 -2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 -3,null,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -4,null,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4 +1,null,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0 +2,null,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0 +3,null,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0 +4,null,4000,2020-01-04 00:00:00.0,2022-12-06,DIGEST4,2000-01-03 00:00:00.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/max_version_filter_duplicates/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/max_version_filter_duplicates/expected_pass1.csv new file mode 100644 index 00000000000..c8909eb5c9f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/max_version_filter_duplicates/expected_pass1.csv @@ -0,0 +1,3 @@ +1,Andy,3,3000,2012-01-03,digest3 +2,Becky,1,4000,2012-01-04,digest4 +3,Cathy,1,5000,2012-01-05,digest5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_data_splits/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_data_splits/expected_pass1.csv deleted file mode 100644 index 296d0d6dd8c..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_data_splits/expected_pass1.csv +++ /dev/null @@ -1,3 +0,0 @@ -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 -1,HARRISON,11000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4 -2,ROBERTO,21000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass1.csv similarity index 80% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass1.csv index 7237e360400..f1b14a6a58e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass1.csv @@ -1,3 +1,3 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,5,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass2.csv similarity index 62% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass2.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass2.csv index 9166cca23da..a81247cc2d3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass2.csv @@ -1,7 +1,6 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,1,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,5,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST10,100,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST11,5,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST14,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST15,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass3.csv similarity index 62% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass3.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass3.csv index 9166cca23da..a81247cc2d3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/expected_pass3.csv @@ -1,7 +1,6 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,1,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,5,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST10,100,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST11,5,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST14,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST15,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/digest_based/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/digest_based/expected_pass1.csv new file mode 100644 index 00000000000..8b835df00f3 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/digest_based/expected_pass1.csv @@ -0,0 +1,6 @@ +1,Andy,1,11000,2012-01-01,digest11,1,1,1 +2,Becky,1,21000,2012-02-01,digest21,1,1,1 +3,Cathy,1,31000,2012-03-01,digest31,1,1,999999999 +1,Andy,2,12000,2012-01-02,digest12,1,2,2 +2,Becky,2,22000,2012-02-02,digest22,1,2,999999999 +1,Andy,3,13000,2012-01-03,digest13,1,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/digest_based/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/digest_based/expected_pass2.csv new file mode 100644 index 00000000000..39406f84aad --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/digest_based/expected_pass2.csv @@ -0,0 +1,10 @@ +1,Andy,1,11000,2012-01-01,digest11,1,1,1 +2,Becky,1,21000,2012-02-01,digest21,1,1,1 +3,Cathy,1,31000,2012-03-01,digest31,1,1,999999999 +1,Andy,2,12000,2012-01-02,digest12,1,2,2 +2,Becky,2,22000,2012-02-02,digest22,1,2,4 +1,Andy,3,13000,2012-01-03,digest13,1,3,3 +1,Andy,2,12000,2012-01-02,digest12,2,4,999999999 +4,Dexter,1,41000,2012-04-01,digest41,2,4,4 +2,Becky,3,23000,2012-02-03,digest23,2,5,999999999 +4,Dexter,2,42000,2012-04-02,digest42,2,5,999999999 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/greater_than/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/greater_than/expected_pass1.csv new file mode 100644 index 00000000000..8b835df00f3 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/greater_than/expected_pass1.csv @@ -0,0 +1,6 @@ +1,Andy,1,11000,2012-01-01,digest11,1,1,1 +2,Becky,1,21000,2012-02-01,digest21,1,1,1 +3,Cathy,1,31000,2012-03-01,digest31,1,1,999999999 +1,Andy,2,12000,2012-01-02,digest12,1,2,2 +2,Becky,2,22000,2012-02-02,digest22,1,2,999999999 +1,Andy,3,13000,2012-01-03,digest13,1,3,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/greater_than/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/greater_than/expected_pass2.csv new file mode 100644 index 00000000000..e88eb108eae --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/batch_id_based/with_all_version/greater_than/expected_pass2.csv @@ -0,0 +1,9 @@ +1,Andy,1,11000,2012-01-01,digest11,1,1,1 +2,Becky,1,21000,2012-02-01,digest21,1,1,1 +3,Cathy,1,31000,2012-03-01,digest31,1,1,999999999 +1,Andy,2,12000,2012-01-02,digest12,1,2,2 +2,Becky,2,22000,2012-02-02,digest22,1,2,4 +1,Andy,3,13000,2012-01-03,digest13,1,3,999999999 +4,Dexter,1,41000,2012-04-01,digest41,2,4,4 +2,Becky,3,23000,2012-02-03,digest23,2,5,999999999 +4,Dexter,2,42000,2012-04-02,digest42,2,5,999999999 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv index 015c2721e1c..6baa1086207 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,4000,2020-01-02 00:00:00.0,null,DIGEST2_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +2,ROBERT,4000,2020-01-02 00:00:00.0,null,DIGEST2_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass3.csv index 015c2721e1c..6baa1086207 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/less_columns_in_staging/expected_pass3.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,4000,2020-01-02 00:00:00.0,null,DIGEST2_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +2,ROBERT,4000,2020-01-02 00:00:00.0,null,DIGEST2_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass2.csv index 917080e73b3..7966d95c27e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass2.csv @@ -1,6 +1,6 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -4,SAM,7000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -5,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST5,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +4,SAM,7000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +5,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST5,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass3.csv index 917080e73b3..7966d95c27e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/with_delete_ind/expected_pass3.csv @@ -1,6 +1,6 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -4,SAM,7000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -5,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST5,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +4,SAM,7000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +5,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST5,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass2.csv index 340cea0a74a..767bd85e86c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass2.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass3.csv index 340cea0a74a..767bd85e86c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/expected/time_based/without_delete_ind/expected_pass3.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/less_columns_in_staging/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/less_columns_in_staging/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/less_columns_in_staging/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/multi_table_ingestion/staging_dataset_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/multi_table_ingestion/staging_dataset_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/multi_table_ingestion/staging_dataset_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass1.csv index 6751bc6c308..0eed793beae 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass1.csv @@ -1,3 +1,6 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,5 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,5 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass4.csv new file mode 100644 index 00000000000..8eae7de4a66 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass4.csv @@ -0,0 +1,2 @@ +4,MATT,7000,2020-01-07 00:00:00.0,2022-12-07,DIGEST5,1 +4,MATT,7000,2020-01-07 00:00:00.0,2022-12-07,DIGEST5,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_optimization_filter/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_optimization_filter/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_optimization_filter/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/staging_data_pass1.csv similarity index 83% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/staging_data_pass1.csv index 2deae477a95..cf34912d6a6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/staging_data_pass1.csv @@ -3,4 +3,4 @@ 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,1 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,2 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,2 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,1,2 \ No newline at end of file +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,5,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/staging_data_pass2.csv similarity index 79% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass2.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/staging_data_pass2.csv index bd59e145f6a..819b81a7600 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/digest_based/staging_data_pass2.csv @@ -8,8 +8,8 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST8,2,3 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST9,3,3 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST10,100,3 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST11,5,3 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST12,4,3 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST13,3,3 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,5,3 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST4,4,3 +2,ROBERT,1000,2020-01-02 00:00:00.0,2022-12-02,DIGEST3,3,3 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST14,1,3 4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST15,1,3 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass1.csv index 4aaf96d9b8e..81428abcfa3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass1.csv @@ -2,5 +2,8 @@ 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,1 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,1 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,2 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,2 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST4,2,2 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,1,2 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST5,1,2 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,1,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than/without_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/with_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass4.csv new file mode 100644 index 00000000000..25ba12fe9c4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_max_versioning/greater_than_equal_to/without_dedup/staging_data_pass4.csv @@ -0,0 +1,2 @@ +4,MATT,7000,2020-01-07 00:00:00.0,2022-12-07,DIGEST8,1,4 +4,MATT,7000,2020-01-07 00:00:00.0,2022-12-07,DIGEST8,1,4 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_no_versioning/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_no_versioning/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/with_staging_filter/with_no_versioning/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/less_columns_in_staging/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/less_columns_in_staging/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/less_columns_in_staging/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_all_version/data1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_all_version/data1.csv new file mode 100644 index 00000000000..4802bef9db8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_all_version/data1.csv @@ -0,0 +1,14 @@ +1,Andy,1,11000,2012-01-01,digest11,1 +1,Andy,2,12000,2012-01-02,digest12,1 +1,Andy,3,13000,2012-01-03,digest13,1 +2,Becky,1,21000,2012-02-01,digest21,1 +2,Becky,2,22000,2012-02-02,digest22,1 +3,Cathy,1,31000,2012-03-01,digest31,1 +1,Andy,2,12000,2012-01-02,digest12,2 +1,Andy,2,12000,2012-01-02,digest12,2 +2,Becky,2,22000,2012-02-02,digest22,2 +2,Becky,3,23000,2012-02-03,digest23,2 +4,Dexter,1,41000,2012-04-01,digest41,2 +4,Dexter,2,42000,2012-04-02,digest42,2 +5,Elena,1,51000,2012-05-01,digest51,3 +5,Elena,1,52000,2012-05-02,digest52,3 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_data_splits/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_data_splits/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_data_splits/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/with_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass2.csv index 0d58c6909b0..1f269393f64 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass2.csv @@ -1,3 +1,6 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED 2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED 4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_based/without_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/less_columns_in_staging/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/less_columns_in_staging/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/less_columns_in_staging/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/with_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/with_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/with_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/without_delete_ind/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/without_delete_ind/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/without_delete_ind/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/without_delete_ind/staging_data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/without_delete_ind/staging_data_pass4.csv new file mode 100644 index 00000000000..a53cf448f1e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-incremental-milestoning/input/time_based/without_delete_ind/staging_data_pass4.csv @@ -0,0 +1,2 @@ +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass1.csv new file mode 100644 index 00000000000..8d6910e9f83 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass1.csv @@ -0,0 +1,6 @@ +2021-12-01,GS,383.82,2476002,DIGEST3_UPDATED2,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,IBM,116.92,5958300,DIGEST1,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,12253400,DIGEST2,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,37800.00,3343700,DIGEST6,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,IBM,117.37,5267100,DIGEST4,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPMX,159.83,12969901,DIGEST5_UPDATED,2,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass2.csv new file mode 100644 index 00000000000..a51cfd15e9b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass2.csv @@ -0,0 +1,8 @@ +2021-12-01,GS,383.82,2476002,DIGEST3_UPDATED2,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,IBM,116.92,5958300,DIGEST1,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,12253400,DIGEST2,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,37800.00,3343700,DIGEST6,1,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,IBM,117.37,5267100,DIGEST4,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPMX,159.83,12969901,DIGEST5_UPDATED,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,GS,378.00,3343700,DIGEST8,2,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPM,159.83,12969900,DIGEST7,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass3.csv new file mode 100644 index 00000000000..a51cfd15e9b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/max_version/expected_pass3.csv @@ -0,0 +1,8 @@ +2021-12-01,GS,383.82,2476002,DIGEST3_UPDATED2,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,IBM,116.92,5958300,DIGEST1,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,12253400,DIGEST2,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,37800.00,3343700,DIGEST6,1,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,IBM,117.37,5267100,DIGEST4,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPMX,159.83,12969901,DIGEST5_UPDATED,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,GS,378.00,3343700,DIGEST8,2,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPM,159.83,12969900,DIGEST7,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/no_version/expected_pass1.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/no_version/expected_pass1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/no_version/expected_pass2.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass2.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/no_version/expected_pass2.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/no_version/expected_pass3.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass3.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/no_version/expected_pass3.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass1.csv new file mode 100644 index 00000000000..86afb1ffcf3 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass1.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST3,2,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3002,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass2.csv new file mode 100644 index 00000000000..b3d51119d7c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass2.csv @@ -0,0 +1,6 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST3,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3002,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,3,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST7,4,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST8,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass3.csv new file mode 100644 index 00000000000..053bdcfcfe9 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/max_version/expected_pass3.csv @@ -0,0 +1,6 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,1,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST3,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3002,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,3,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST7,4,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST8,1,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass1.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass2.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass2.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass2.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass3.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass3.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass3.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass4.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass4.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/no_version/expected_pass4.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv index 52ff36d9cec..5c99d4ae109 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/less_columns_in_staging/expected_pass2.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,null,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2,ROBERT,2000,2020-01-02 00:00:00.0,null,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -3,ANDY,3100,2020-01-03 00:00:00.0,null,DIGEST3_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3000,2020-01-03 00:00:00.0,null,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +3,ANDY,3100,2020-01-03 00:00:00.0,null,DIGEST3_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,null,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass2.csv index e7232a18a95..2fe02593d27 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass2.csv @@ -2,7 +2,7 @@ 2021-12-01,JPM,161.00,12253400,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2021-12-01,GS,383.82,2476000,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2021-12-02,IBM,117.37,5267100,DIGEST4,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2021-12-02,JPMX,159.83,12969900,DIGEST5,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2021-12-02,GS,37800.00,3343700,DIGEST6,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2021-12-02,JPM,159.83,12969900,DIGEST7,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -2021-12-02,GS,378.00,3343700,DIGEST8,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPMX,159.83,12969900,DIGEST5,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2021-12-02,GS,37800.00,3343700,DIGEST6,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2021-12-02,JPM,159.83,12969900,DIGEST7,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +2021-12-02,GS,378.00,3343700,DIGEST8,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv index e7232a18a95..2fe02593d27 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv @@ -2,7 +2,7 @@ 2021-12-01,JPM,161.00,12253400,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2021-12-01,GS,383.82,2476000,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2021-12-02,IBM,117.37,5267100,DIGEST4,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -2021-12-02,JPMX,159.83,12969900,DIGEST5,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2021-12-02,GS,37800.00,3343700,DIGEST6,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2021-12-02,JPM,159.83,12969900,DIGEST7,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -2021-12-02,GS,378.00,3343700,DIGEST8,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPMX,159.83,12969900,DIGEST5,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2021-12-02,GS,37800.00,3343700,DIGEST6,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +2021-12-02,JPM,159.83,12969900,DIGEST7,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +2021-12-02,GS,378.00,3343700,DIGEST8,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass2.csv index 32412eb20f4..f0e990dd0eb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass2.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv index 32412eb20f4..f0e990dd0eb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv @@ -1,5 +1,5 @@ 1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 -3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 -4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.123456 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.123456,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/max_version/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/max_version/staging_data_pass1.csv new file mode 100644 index 00000000000..aac15867141 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/max_version/staging_data_pass1.csv @@ -0,0 +1,9 @@ +2021-12-01,IBM,116.92,5958300,DIGEST1,1 +2021-12-01,JPM,161.00,12253400,DIGEST2,1 +2021-12-01,GS,383.82,2476000,DIGEST3,1 +2021-12-01,GS,383.82,2476001,DIGEST3_UPDATED1,2 +2021-12-01,GS,383.82,2476002,DIGEST3_UPDATED2,3 +2021-12-02,IBM,117.37,5267100,DIGEST4,1 +2021-12-02,JPMX,159.83,12969900,DIGEST5,1 +2021-12-02,JPMX,159.83,12969901,DIGEST5_UPDATED,2 +2021-12-02,GS,37800.00,3343700,DIGEST6,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/max_version/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/max_version/staging_data_pass2.csv new file mode 100644 index 00000000000..4110163a165 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/max_version/staging_data_pass2.csv @@ -0,0 +1,4 @@ +2021-12-02,IBM,117.37,5267100,DIGEST4,1 +2021-12-02,JPM,159.83,12969900,DIGEST7,1 +2021-12-02,GS,378.00,3343700,DIGEST8,2 +2021-12-02,GS,378.00,3343700,DIGEST8,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/no_version/staging_data_pass1.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/no_version/staging_data_pass1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/no_version/staging_data_pass2.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass2.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/no_version/staging_data_pass2.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_partition/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/max_version/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/max_version/staging_data_pass1.csv new file mode 100644 index 00000000000..0f4408df251 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/max_version/staging_data_pass1.csv @@ -0,0 +1,6 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,DIGEST3,2 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST4,1 +3,ANDY,3001,2020-01-03 00:00:00.0,2022-12-03,DIGEST5,2 +3,ANDY,3002,2020-01-03 00:00:00.0,2022-12-03,DIGEST6,3 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/max_version/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/max_version/staging_data_pass2.csv new file mode 100644 index 00000000000..6cd3b417fe5 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/max_version/staging_data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST7,4 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST8,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/no_version/staging_data_pass1.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/no_version/staging_data_pass1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/no_version/staging_data_pass2.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass2.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/no_version/staging_data_pass2.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/without_partition/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/with_partition/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/with_partition/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/with_partition/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/with_partition_filter/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/with_partition_filter/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/with_partition_filter/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/without_partition/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/without_partition/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_based/without_partition/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/time_based/with_partition/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/time_based/with_partition/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/time_based/with_partition/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/time_based/without_partition/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/time_based/without_partition/staging_data_pass3.csv deleted file mode 100644 index 8b137891791..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/time_based/without_partition/staging_data_pass3.csv +++ /dev/null @@ -1 +0,0 @@ - diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index c79587954e9..20c714fa2a7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -14,7 +14,6 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; @@ -41,7 +40,7 @@ public RelationalSink getRelationalSink() } @Override - public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations) + public void verifyAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -53,45 +52,36 @@ public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyAllowDuplicatesWithAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage)"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableCreateQueryWithAuditAndNoPKs, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertNull(operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); } @Override - public void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List generatorResults, List dataSplitRanges) + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(List generatorResults, List dataSplitRanges) { String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit, generatorResults.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, generatorResults.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates, generatorResults.get(0).deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), generatorResults.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), generatorResults.get(1).ingestSql().get(0)); Assertions.assertEquals(2, generatorResults.size()); // Stats - String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -102,72 +92,23 @@ public void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage)"; - - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyFailOnDuplicatesWithAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage)"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditNotPKCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyAppendOnlyFilterDuplicatesNoAuditing(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + - "(sink.`digest` = stage.`digest`))))"; - - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); - - // Stats - Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); - Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); - Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); - Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); - } - - @Override - public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries) + public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRecords(GeneratorResult queries) { List preActionsSqlList = queries.preActionsSql(); List milestoningSqlList = queries.ingestSql(); + List deduplicationAndVersioningSql = queries.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00' FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + - "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); List postActionsSql = queries.postActionsSql(); @@ -176,6 +117,7 @@ public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries assertIfListsAreSameIgnoringOrder(expectedSQL, postActionsSql); // Stats + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(incomingRecordCount, queries.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, queries.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); @@ -185,26 +127,30 @@ public void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries } @Override - public void verifyAppendOnlyFilterDuplicatesWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExistingRecords(List operations, List dataSplitRanges) { String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage " + - "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + - "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + - "(sink.`digest` = stage.`digest`)))))"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + + "(sink.`digest` = stage.`digest`)))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit, operations.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates, operations.get(0).deduplicationAndVersioningSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); Assertions.assertEquals(2, operations.size()); // Stats - String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage " + - "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -221,14 +167,13 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`) " + - "(SELECT * FROM `MYDB`.`STAGING` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink " + - "WHERE ((sink.`ID` = stage.`ID`) " + - "AND (sink.`NAME` = stage.`NAME`)) " + - "AND (sink.`DIGEST` = stage.`DIGEST`))))"; + String insertSql = "INSERT INTO `MYDB`.`MAIN` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`) " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,'2000-01-01 00:00:00.000000' FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE NOT (EXISTS " + + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQueryWithUpperCase, preActionsSqlList.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase, preActionsSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); } @@ -238,23 +183,71 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); - String insertSql = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + - "(sink.`digest` = stage.`digest`))))"; + String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,'2000-01-01 00:00:00.000000' FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); } - private void verifyStats(GeneratorResult operations) + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecords(GeneratorResult operations) { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + + String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + } + + @Override + public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + + String insertSql = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java index b47384fde55..38fd10f2401 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java @@ -71,24 +71,24 @@ public void verifyBitemporalDeltaBatchIdDateTimeBasedNoDeleteIndWithDataSplits(L { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`validity_from_target` = stage.`validity_from_reference`) AND (sink.`digest` <> stage.`digest`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `validity_from_target`, " + - "`validity_through_target`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + + "`validity_through_target`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`validity_through_reference`," + - "stage.`digest`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "stage.`digest`,stage.`version`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE " + "(sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`validity_from_target` = stage.`validity_from_reference`)))) AND " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')))"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalMainTableWithBatchIdDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedMilestoneQuery, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -153,7 +153,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND " + @@ -162,10 +162,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `validity_from_target`, `validity_through_target`, `digest`, " + + "(`id`, `name`, `amount`, `validity_from_target`, `validity_through_target`, `digest`, `version`, " + "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`," + - "stage.`validity_through_reference`,stage.`digest`,'2000-01-01 00:00:00'," + + "stage.`validity_through_reference`,stage.`digest`,stage.`version`,'2000-01-01 00:00:00.000000'," + "'9999-12-31 23:59:59' FROM `mydb`.`staging` as stage WHERE " + "((NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + "AND (sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + @@ -173,7 +173,7 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) AND " + "(stage.`delete_indicator` NOT IN ('yes','1','true')))"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalMainTableWithDatetimeCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedMilestoneQuery, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -185,10 +185,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`data_split` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`data_split` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -132,10 +134,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`delete_indicator`,stage.`data_split` FROM `mydb`.`staging` as stage WHERE (stage.`delete_indicator` NOT IN ('yes','1','true')) AND ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -354,10 +360,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + "WHERE legend_persistence_x.`delete_indicator` = 0 " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_through_target` > legend_persistence_x.`legend_persistence_start_date`) AND (legend_persistence_y.`validity_through_target` <= legend_persistence_x.`legend_persistence_end_date`) AND (legend_persistence_y.`delete_indicator` <> 0) " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery, operations.get(0).preActionsSql().get(3)); @@ -446,6 +452,9 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedStageToStageWithoutDuplicates = "INSERT INTO `mydb`.`stagingWithoutDuplicates` " + - "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `data_split`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`data_split` FROM `mydb`.`staging` as stage " + + "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `version`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`data_split` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`digest` = stage.`digest`) AND (sink.`batch_id_out` = 999999999))))"; String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`data_split` FROM `mydb`.`stagingWithoutDuplicates` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`data_split` FROM `mydb`.`stagingWithoutDuplicates` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -566,10 +575,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_x.`validity_from_reference` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedMainToTemp = "INSERT INTO `mydb`.`temp` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`version`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,legend_persistence_x.`legend_persistence_end_date` as `legend_persistence_end_date` " + "FROM " + @@ -594,13 +603,13 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "AND (sink.`batch_id_out` = 999999999)"; String expectedTempToMain = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + - "(SELECT temp.`id`,temp.`name`,temp.`amount`,temp.`digest`,temp.`batch_id_in`,temp.`batch_id_out`,temp.`validity_from_target`,temp.`validity_through_target` FROM `mydb`.`temp` as temp)"; + "(`id`, `name`, `amount`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + + "(SELECT temp.`id`,temp.`name`,temp.`amount`,temp.`digest`,temp.`version`,temp.`batch_id_in`,temp.`batch_id_out`,temp.`validity_from_target`,temp.`validity_through_target` FROM `mydb`.`temp` as temp)"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyStageWithDataSplitWithoutDuplicatesTableCreateQuery, operations.get(0).preActionsSql().get(3)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyTempTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(2)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery, operations.get(0).preActionsSql().get(3)); Assertions.assertEquals(expectedStageToStageWithoutDuplicates, operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(expectedStageToTemp, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(1)); @@ -620,6 +629,9 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(0).metadataIngestSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`temp`"), operations.get(0).postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`stagingWithoutDuplicates`"), operations.get(0).postCleanupSql().get(1)); + Assertions.assertEquals(2, operations.size()); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount,dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); @@ -763,6 +775,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`batch_id_in` INTEGER NOT NULL," + "`batch_id_out` INTEGER," + "`validity_from_target` DATETIME NOT NULL," + @@ -774,6 +787,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`batch_id_in` INTEGER NOT NULL," + "`batch_id_out` INTEGER," + "`validity_from_target` DATETIME NOT NULL," + @@ -787,20 +801,21 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`amount` DOUBLE," + "`validity_from_reference` DATETIME NOT NULL," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`delete_indicator` VARCHAR(256)," + "`data_split` BIGINT NOT NULL," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`, `data_split`))"; String expectedStageToStageWithoutDuplicates = "INSERT INTO " + stageWithoutDuplicatesName + " " + - "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `delete_indicator`, `data_split`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`delete_indicator`,stage.`data_split` FROM `mydb`.`staging` as stage " + + "(`id`, `name`, `amount`, `validity_from_reference`, `digest`, `version`, `delete_indicator`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest`,stage.`version`,stage.`delete_indicator`,stage.`data_split` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`digest` = stage.`digest`) AND (sink.`batch_id_out` = 999999999))))"; String expectedStageToTemp = "INSERT INTO " + tempName + " " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT legend_persistence_stageWithoutDuplicates.`id`,legend_persistence_stageWithoutDuplicates.`name`,legend_persistence_stageWithoutDuplicates.`amount`,legend_persistence_stageWithoutDuplicates.`validity_from_reference`,legend_persistence_stageWithoutDuplicates.`digest`,legend_persistence_stageWithoutDuplicates.`delete_indicator`,legend_persistence_stageWithoutDuplicates.`data_split` FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.`delete_indicator` NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + + "(SELECT legend_persistence_stageWithoutDuplicates.`id`,legend_persistence_stageWithoutDuplicates.`name`,legend_persistence_stageWithoutDuplicates.`amount`,legend_persistence_stageWithoutDuplicates.`validity_from_reference`,legend_persistence_stageWithoutDuplicates.`digest`,legend_persistence_stageWithoutDuplicates.`version`,legend_persistence_stageWithoutDuplicates.`delete_indicator`,legend_persistence_stageWithoutDuplicates.`data_split` FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.`delete_indicator` NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + @@ -818,10 +833,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_x.`validity_from_reference` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedMainToTemp = "INSERT INTO " + tempName + " " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + "FROM " + - "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`version`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,legend_persistence_x.`legend_persistence_end_date` as `legend_persistence_end_date` " + "FROM " + @@ -846,12 +861,12 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "AND (sink.`batch_id_out` = 999999999)"; String expectedTempToMain = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + - "(SELECT legend_persistence_temp.`id`,legend_persistence_temp.`name`,legend_persistence_temp.`amount`,legend_persistence_temp.`digest`,legend_persistence_temp.`batch_id_in`,legend_persistence_temp.`batch_id_out`,legend_persistence_temp.`validity_from_target`,legend_persistence_temp.`validity_through_target` FROM " + tempName + " as legend_persistence_temp)"; + "(`id`, `name`, `amount`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `validity_from_target`, `validity_through_target`) " + + "(SELECT legend_persistence_temp.`id`,legend_persistence_temp.`name`,legend_persistence_temp.`amount`,legend_persistence_temp.`digest`,legend_persistence_temp.`version`,legend_persistence_temp.`batch_id_in`,legend_persistence_temp.`batch_id_out`,legend_persistence_temp.`validity_from_target`,legend_persistence_temp.`validity_through_target` FROM " + tempName + " as legend_persistence_temp)"; String expectedMainToTempForDeletion = "INSERT INTO " + tempWithDeleteIndicatorName + " " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `delete_indicator`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_x.`validity_through_target` as `legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,(CASE WHEN legend_persistence_y.`delete_indicator` IS NULL THEN 0 ELSE 1 END) " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `delete_indicator`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_x.`validity_through_target` as `legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,(CASE WHEN legend_persistence_y.`delete_indicator` IS NULL THEN 0 ELSE 1 END) " + "FROM " + "(SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + "AND (EXISTS " + @@ -871,19 +886,19 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "AND (sink.`batch_id_out` = 999999999)"; String expectedTempToMainForDeletion = "INSERT INTO `mydb`.`main` " + - "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),'9999-12-31 23:59:59') as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),'9999-12-31 23:59:59') as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + "FROM " + tempWithDeleteIndicatorName + " as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_from_target` > legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + "WHERE legend_persistence_x.`delete_indicator` = 0 " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`validity_from_target`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`) as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_through_target` > legend_persistence_x.`legend_persistence_start_date`) AND (legend_persistence_y.`validity_through_target` <= legend_persistence_x.`legend_persistence_end_date`) AND (legend_persistence_y.`delete_indicator` <> 0) " + - "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`amount`, legend_persistence_x.`version`, legend_persistence_x.`digest`, legend_persistence_x.`legend_persistence_start_date`, legend_persistence_x.`batch_id_in`, legend_persistence_x.`batch_id_out`)"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyMainTableCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBitemporalFromOnlyMainTableWithVersionCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, operations.get(0).preActionsSql().get(1)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableCreateQuery, operations.get(0).preActionsSql().get(2)); Assertions.assertEquals(expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery, operations.get(0).preActionsSql().get(3)); @@ -915,6 +930,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(0).metadataIngestSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`main_legend_persistence_temp`"), operations.get(0).postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`main_legend_persistence_tempWithDeleteIndicator`"), operations.get(0).postCleanupSql().get(1)); + Assertions.assertEquals(getDropTempTableQuery("`mydb`.`staging_legend_persistence_stageWithoutDuplicates`"), operations.get(0).postCleanupSql().get(2)); + Assertions.assertEquals(2, operations.size()); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`) AND (sink2.`validity_from_target` = sink.`validity_from_target`)) AND (sink2.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))))"; @@ -1005,7 +1024,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`," + - "legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1027,7 +1046,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedMainToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM " + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`batch_time_in`," + "sink.`batch_time_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink " + @@ -1051,7 +1070,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedUpdateMain = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (EXISTS " + "(SELECT * FROM `mydb`.`temp` as temp WHERE ((sink.`id` = temp.`id`) AND (sink.`name` = temp.`name`)) " + "AND (sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_id_out` = 999999999)"; @@ -1086,7 +1105,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`," + - "legend_persistence_y.`legend_persistence_end_date`,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "legend_persistence_y.`legend_persistence_end_date`,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1109,7 +1128,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`," + "legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM (SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_time_in`," + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM (SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_time_in`," + "sink.`batch_time_out`,sink.`validity_from_target`,sink.`validity_through_target` " + "FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '9999-12-31 23:59:59') as legend_persistence_x " + "INNER JOIN " + @@ -1132,7 +1151,7 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "AND (legend_persistence_x.`validity_from_target` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedUpdateMain = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (EXISTS (SELECT * FROM `mydb`.`temp` as temp WHERE " + "((sink.`id` = temp.`id`) AND (sink.`name` = temp.`name`)) AND " + "(sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_time_out` = '9999-12-31 23:59:59')"; @@ -1154,8 +1173,8 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00'"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00') as `rowsInserted`"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000'"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000') as `rowsInserted`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java index bd8903a0df6..4fcb9f97f5a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java @@ -54,7 +54,7 @@ public class IngestModeTest String[] partitionKeys = new String[]{"biz_date"}; HashMap> partitionFilter = new HashMap>() {{ - put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00", "2000-01-02 00:00:00"))); + put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00.000000", "2000-01-02 00:00:00"))); }}; // Base Columns: Primary keys : id, name @@ -167,9 +167,9 @@ public class IngestModeTest "`BATCH_STATUS` VARCHAR(32)," + "`TABLE_BATCH_ID` INTEGER)"; - protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; - protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.`TABLE_NAME` = 'main'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.`TABLE_NAME` = 'main'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java index 81bf04a1f2e..12cb43e9b10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java @@ -62,6 +62,30 @@ public class MemsqlTestArtifacts "`version` INTEGER," + "PRIMARY KEY (`id`, `name`))"; + public static String expectedBaseTempStagingTableWithCount = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`legend_persistence_count` INTEGER)"; + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`digest` VARCHAR(256)," + + "`legend_persistence_count` INTEGER)"; + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`digest` VARCHAR(256)," + + "`legend_persistence_count` INTEGER," + + "`data_split` INTEGER NOT NULL)"; + public static String expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + "`ID` INTEGER NOT NULL," + "`NAME` VARCHAR(256) NOT NULL," + @@ -113,6 +137,15 @@ public class MemsqlTestArtifacts "`batch_update_time` DATETIME NOT NULL," + "PRIMARY KEY (`id`, `name`, `batch_update_time`))"; + public static String expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + + "`ID` INTEGER NOT NULL," + + "`NAME` VARCHAR(256) NOT NULL," + + "`AMOUNT` DOUBLE," + + "`BIZ_DATE` DATE," + + "`DIGEST` VARCHAR(256)," + + "`BATCH_UPDATE_TIME` DATETIME NOT NULL," + + "PRIMARY KEY (`ID`, `NAME`, `BATCH_UPDATE_TIME`))"; + public static String expectedBaseTableWithAuditNotPKCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + @@ -131,6 +164,7 @@ public class MemsqlTestArtifacts "PRIMARY KEY (`id`, `name`, `batch_update_time`))"; public static String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage"; + public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage"; public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging` CASCADE"; @@ -162,10 +196,10 @@ public class MemsqlTestArtifacts "`BATCH_ID_IN` INTEGER NOT NULL,`BATCH_ID_OUT` INTEGER,PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`))"; public static String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`)" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; public static String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`)" + - " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE')"; + " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; public static String expectedMetadataTableIngestQueryWithPlaceHolders = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) " + "(SELECT 'main',{BATCH_ID_PATTERN},'{BATCH_START_TS_PATTERN}','{BATCH_END_TS_PATTERN}','DONE')"; @@ -222,11 +256,12 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; - public static String expectedBitemporalMainTableWithBatchIdDatetimeCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`batch_id_in` INTEGER NOT NULL," + "`batch_id_out` INTEGER," + "`batch_time_in` DATETIME," + @@ -235,11 +270,12 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalMainTableWithDatetimeCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`batch_time_in` DATETIME NOT NULL," + "`batch_time_out` DATETIME," + "`validity_from_target` DATETIME NOT NULL," + @@ -257,6 +293,18 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; + public static String expectedBitemporalFromOnlyMainTableWithVersionCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`digest` VARCHAR(256)," + + "`version` INTEGER," + + "`batch_id_in` INTEGER NOT NULL," + + "`batch_id_out` INTEGER," + + "`validity_from_target` DATETIME NOT NULL," + + "`validity_through_target` DATETIME," + + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; + public static String expectedBitemporalFromOnlyStagingTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + @@ -310,6 +358,18 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; + public static String expectedBitemporalFromOnlyTempTableWithVersionCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`temp`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`digest` VARCHAR(256)," + + "`version` INTEGER," + + "`batch_id_in` INTEGER NOT NULL," + + "`batch_id_out` INTEGER," + + "`validity_from_target` DATETIME NOT NULL," + + "`validity_through_target` DATETIME," + + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; + public static String expectedBitemporalFromOnlyTempTableBatchIdAndTimeBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`temp`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + @@ -354,12 +414,13 @@ public class MemsqlTestArtifacts "`delete_indicator` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyStageWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + + public static String expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`validity_from_reference` DATETIME NOT NULL," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`data_split` BIGINT NOT NULL," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`, `data_split`))"; @@ -372,4 +433,60 @@ public class MemsqlTestArtifacts "`delete_indicator` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; + public static String expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`," + + "DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,COUNT(*) as `legend_persistence_count` FROM " + + "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage) " + + "as stage WHERE stage.`legend_persistence_rank` = 1)"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM " + + "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + + "WHERE stage.`legend_persistence_rank` = 1)"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; + + public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage"; + + public static String dataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT` " + + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + + "stage.`LEGEND_PERSISTENCE_COUNT` as `LEGEND_PERSISTENCE_COUNT`," + + "DENSE_RANK() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`BIZ_DATE` DESC) as `LEGEND_PERSISTENCE_RANK` " + + "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,COUNT(*) as `LEGEND_PERSISTENCE_COUNT` " + + "FROM `MYDB`.`STAGING` as stage GROUP BY stage.`ID`, stage.`NAME`, stage.`AMOUNT`, stage.`BIZ_DATE`, stage.`DIGEST`) as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_RANK` = 1)"; + public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index a608045e934..e5daeed87f7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -30,11 +30,18 @@ public class NontemporalDeltaTest extends NontemporalDeltaTestCases protected String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; protected String incomingRecordCountWithSplits = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE " + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + + protected String incomingRecordCountWithSplitsAndDuplicates = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + + protected String incomingRecordCountWithSplitsTempTable = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + + "(stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; protected String rowsTerminated = "SELECT 0 as `rowsTerminated`"; protected String rowsDeleted = "SELECT 0 as `rowsDeleted`"; @Override - public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -50,7 +57,7 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; @@ -66,25 +73,25 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio } @Override - public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE `mydb`.`main` as sink " + - "INNER JOIN `mydb`.`staging` as stage " + + "INNER JOIN `mydb`.`staging_legend_persistence_temp_staging` as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + "SET sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + "sink.`amount` = stage.`amount`," + "sink.`biz_date` = stage.`biz_date`," + "sink.`digest` = stage.`digest`," + - "sink.`batch_update_time` = '2000-01-01 00:00:00'"; + "sink.`batch_update_time` = '2000-01-01 00:00:00.000000'"; String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; @@ -99,7 +106,39 @@ public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operat } @Override - public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) + { + String updateSql = "UPDATE `mydb`.`main` as sink " + + "INNER JOIN " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + + "SET sink.`id` = stage.`id`," + + "sink.`name` = stage.`name`," + + "sink.`amount` = stage.`amount`," + + "sink.`biz_date` = stage.`biz_date`," + + "sink.`digest` = stage.`digest`"; + + String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + + "AND (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(1)); + + Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(1)); + + // Stats + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempTable, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempTable, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + + @Override + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List operations, List dataSplitRanges) { String updateSql = "UPDATE `mydb`.`main` as sink " + "INNER JOIN " + @@ -122,7 +161,7 @@ public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List Assertions.assertEquals(enrichSqlWithDataSplits(updateSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(1)); - + // Stats Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); @@ -131,21 +170,21 @@ public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List } @Override - public void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String updateSql = "UPDATE `mydb`.`main` as sink " + "INNER JOIN " + - "(SELECT * FROM `mydb`.`staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) SET " + "sink.`id` = stage.`id`," + "sink.`name` = stage.`name`," + "sink.`amount` = stage.`amount`," + "sink.`biz_date` = stage.`biz_date`," + "sink.`digest` = stage.`digest`," + - "sink.`batch_update_time` = '2000-01-01 00:00:00'"; + "sink.`batch_update_time` = '2000-01-01 00:00:00.000000'"; String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00' FROM `mydb`.`staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,'2000-01-01 00:00:00.000000' FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "AND (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -156,14 +195,14 @@ public void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -184,8 +223,8 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator(Gener "WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; String deleteSql = "DELETE FROM `mydb`.`main` as sink " + - "WHERE EXISTS (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` " + - "FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + + "WHERE EXISTS (" + + "SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND (sink.`digest` = stage.`digest`) AND (stage.`delete_indicator` IN ('yes','1','true')))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); @@ -215,7 +254,7 @@ public void verifyNontemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati "sink.`DIGEST` = stage.`DIGEST`"; String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`) " + - "(SELECT * FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST` FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink " + "WHERE (sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`))))"; @@ -239,7 +278,7 @@ public void verifyNontemporalDeltaWithLessColumnsInStaging(GeneratorResult opera "sink.`digest` = stage.`digest`"; String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; @@ -272,7 +311,7 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT * FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS " + "(SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)))) AND ((stage.`biz_date` > '2020-01-01') AND (stage.`biz_date` < '2020-01-03')))"; @@ -288,23 +327,19 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(GeneratorResult operations) + public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE `mydb`.`main` as sink " + "INNER JOIN " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`,ROW_NUMBER() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_row_num` FROM `mydb`.`staging` as stage WHERE stage.`snapshot_id` > 18972) as stage WHERE stage.`legend_persistence_row_num` = 1) as stage " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage " + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`) " + "SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version`"; String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `version`) " + - "(SELECT * FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`,ROW_NUMBER() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_row_num` FROM `mydb`.`staging` as stage " + - "WHERE stage.`snapshot_id` > 18972) as stage " + - "WHERE stage.`legend_persistence_row_num` = 1) as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQuery, preActionsSqlList.get(0)); @@ -319,7 +354,7 @@ public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(Ge } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -332,7 +367,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`) " + - "(SELECT * FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS " + "(SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))) " + "AND (stage.`snapshot_id` > 18972))"; @@ -348,7 +383,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaNoDedupMaxVersionWithoutPerform(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -359,7 +394,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( "SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`version` = stage.`version`"; String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `version`) " + - "(SELECT * FROM `mydb`.`staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQuery, preActionsSqlList.get(0)); @@ -373,22 +408,19 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String updateSql = "UPDATE `MYDB`.`MAIN` as sink " + "INNER JOIN " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` FROM " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`,ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) as `LEGEND_PERSISTENCE_ROW_NUM` FROM `MYDB`.`STAGING` as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + + "`MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + "ON ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`) " + "SET sink.`ID` = stage.`ID`,sink.`NAME` = stage.`NAME`,sink.`AMOUNT` = stage.`AMOUNT`,sink.`BIZ_DATE` = stage.`BIZ_DATE`,sink.`DIGEST` = stage.`DIGEST`,sink.`VERSION` = stage.`VERSION`"; String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`) " + - "(SELECT * FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` FROM " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`,ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) as `LEGEND_PERSISTENCE_ROW_NUM` FROM `MYDB`.`STAGING` as stage) as stage " + - "WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase, preActionsSqlList.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 0441c583f16..9b5db338c56 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; @@ -24,23 +25,25 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; + +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.maxDupsErrorCheckSql; public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { String rowsDeleted = "SELECT COUNT(*) as `rowsDeleted` FROM `mydb`.`main` as sink"; - String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; String rowsUpdated = "SELECT 0 as `rowsUpdated`"; String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; @Override - public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`) " + - "(SELECT * FROM `mydb`.`staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date` FROM `mydb`.`staging` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedStagingTableCreateQuery, preActionsSqlList.get(1)); @@ -48,66 +51,58 @@ public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult opera Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override - public void verifyNontemporalSnapshotNoAuditingWithDataSplit(GeneratorResult operations) - { - List preActionsSqlList = operations.preActionsSql(); - List milestoningSqlList = operations.ingestSql(); - - String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date` FROM `mydb`.`staging` as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`staging` as stage_right WHERE " + - "(stage.`data_split` < stage_right.`data_split`) AND ((stage.`id` = stage_right.`id`) AND (stage.`name` = stage_right.`name`)))))"; - - Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); - Assertions.assertEquals(MemsqlTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); - Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); - - // Stats - verifyStats(operations); - } - - @Override - public void verifyNontemporalSnapshotWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00.000000' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override - public void verifyNontemporalSnapshotWithAuditingWithDataSplit(GeneratorResult operations) + public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); - String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00' " + - "FROM `mydb`.`staging` as stage WHERE NOT (EXISTS " + - "(SELECT * FROM `mydb`.`staging` as stage_right " + - "WHERE (stage.`data_split` < stage_right.`data_split`) AND ((stage.`id` = stage_right.`id`) AND " + - "(stage.`name` = stage_right.`name`)))))"; + String insertSql = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,'2000-01-01 00:00:00.000000' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + + String maxDataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + "(SELECT COUNT(DISTINCT(`amount`)) as `legend_persistence_distinct_rows` " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithCount, preActionsSqlList.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(maxDataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + // Stats - verifyStats(operations); + verifyStats(operations, "staging"); } @Override @@ -117,7 +112,7 @@ public void verifyNontemporalSnapshotWithUpperCaseOptimizer(GeneratorResult quer List milestoningSqlList = queries.ingestSql(); String insertSql = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`) " + - "(SELECT * FROM `MYDB`.`STAGING` as stage)"; + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE` FROM `MYDB`.`STAGING` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableCreateQueryWithUpperCase, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.cleanupMainTableSqlUpperCase, milestoningSqlList.get(0)); @@ -131,7 +126,7 @@ public void verifyNontemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`) " + - "(SELECT * FROM `mydb`.`staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount` FROM `mydb`.`staging` as stage)"; Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.cleanUpMainTableSql, milestoningSqlList.get(0)); @@ -148,9 +143,9 @@ public void verifyNontemporalSnapshotWithCleanStagingData(GeneratorResult operat } @Override - public void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostActions) + public void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostCleanup) { - List sqlsForPostActions = physicalPlanForPostActions.getSqlList(); + List sqlsForPostActions = physicalPlanForPostCleanup.getSqlList(); List expectedSQL = new ArrayList<>(); expectedSQL.add(MemsqlTestArtifacts.expectedDropTableQuery); assertIfListsAreSameIgnoringOrder(expectedSQL, sqlsForPostActions); @@ -162,12 +157,13 @@ public RelationalSink getRelationalSink() return MemSqlSink.get(); } - private void verifyStats(GeneratorResult operations) + private void verifyStats(GeneratorResult operations, String stageTableName) { // Pre stats: Assertions.assertEquals(rowsDeleted, operations.preIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); // Post Stats: + String incomingRecordCount = String.format("SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`%s` as stage", stageTableName); Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 0737b1bb640..5719e415d13 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -27,7 +27,7 @@ public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -68,7 +68,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupAllVersionsWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + @@ -109,7 +109,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -119,7 +119,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -127,7 +127,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999 FROM `mydb`.`staging` as stage " + + "999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -148,13 +148,13 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -180,7 +180,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -258,7 +258,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFilters(G } @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFiltersIncludesNullValues(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndWithOptimizationFiltersIncludesNullValues(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -318,35 +318,24 @@ public void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(GeneratorResult operations) + public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 " + - "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + - "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS " + - "(SELECT * FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`,ROW_NUMBER() " + - "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_row_num` " + - "FROM `mydb`.`staging` as stage WHERE stage.`batch_id_in` > 5) as stage " + - "WHERE stage.`legend_persistence_row_num` = 1) as stage " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 WHERE (sink.`batch_id_out` = 999999999) AND " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; - String expectedUpsertQuery = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, " + - "`digest`, `version`, `batch_id_in`, `batch_id_out`) " + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 " + - "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version` " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + - "ROW_NUMBER() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) " + - "as `legend_persistence_row_num` FROM `mydb`.`staging` as stage WHERE stage.`batch_id_in` > 5) as stage " + - "WHERE stage.`legend_persistence_row_num` = 1) as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_id_out` = 999999999) AND (stage.`version` <= sink.`version`) " + - "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + + "(stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); @@ -357,7 +346,7 @@ public void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(Gen } @Override - public void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(GeneratorResult operations) + public void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -394,7 +383,7 @@ public void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(Generato } @Override - public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -402,7 +391,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(stage.`version` > sink.`version`)))"; @@ -411,7 +400,7 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + "999999999 " + - "FROM `mydb`.`staging` as stage " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; @@ -425,31 +414,24 @@ public void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = " + - "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA " + - "as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 " + - "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + - "(EXISTS (SELECT * FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` " + - "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + - "ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) " + - "as `LEGEND_PERSISTENCE_ROW_NUM` FROM `MYDB`.`STAGING` as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + - "WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; - - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1 WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + + "(EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + + "AND (sink.`NAME` = stage.`NAME`)) AND (stage.`VERSION` >= sink.`VERSION`)))"; + + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `VERSION`, `BATCH_ID_IN`, `BATCH_ID_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION` " + - "FROM (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,stage.`VERSION`," + - "ROW_NUMBER() OVER (PARTITION BY stage.`ID`,stage.`NAME` ORDER BY stage.`VERSION` DESC) as `LEGEND_PERSISTENCE_ROW_NUM` " + - "FROM `MYDB`.`STAGING` as stage) as stage WHERE stage.`LEGEND_PERSISTENCE_ROW_NUM` = 1) as stage " + - "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) " + - "AND (stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999 FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND " + + "(stage.`VERSION` < sink.`VERSION`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); @@ -481,7 +463,7 @@ protected String getExpectedMetadataTableIngestQueryWithStagingFilters(String st "(`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `staging_filters`) " + "(SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "'2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'DONE'," + + "'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE'," + String.format("PARSE_JSON('%s'))", stagingFilters); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 53d5ea24e7e..02a21455a2b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -27,7 +27,7 @@ public class UnitemporalDeltaBatchIdDateTimeBasedTest extends UnitmemporalDeltaBatchIdDateTimeBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -35,7 +35,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -45,7 +45,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -68,13 +68,13 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -82,8 +82,8 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -101,7 +101,7 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -119,7 +119,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -131,7 +131,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -154,7 +154,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(Generator } @Override - public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalDeltaWithDeleteInd(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -174,7 +174,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -189,13 +189,13 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + @@ -236,8 +236,8 @@ public void verifyUnitemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00' WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00.000000' WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); @@ -254,7 +254,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`)))"; @@ -263,7 +263,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -329,7 +329,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) String expectedMilestoneQuery = "UPDATE `my_schema`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `my_schema`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -339,7 +339,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `my_schema`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `my_schema`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -374,7 +374,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati String expectedMilestoneQuery = "UPDATE `mydb`.`my_schema`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`my_schema`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -384,7 +384,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`my_schema`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`my_schema`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -419,7 +419,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper String expectedMilestoneQuery = "UPDATE main as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM staging as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -429,7 +429,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM staging as stage " + "WHERE NOT (EXISTS (SELECT * FROM main as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index 9d4aa02c54f..ec8d442fc5e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -27,14 +27,14 @@ public class UnitemporalDeltaDateTimeBasedTest extends UnitmemporalDeltaDateTimeBasedTestCases { @Override - public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations) + public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -43,7 +43,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + @@ -58,29 +58,29 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio // Stats String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00'"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000'"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00') as `rowsInserted`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000') as `rowsInserted`"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + - "FROM `mydb`.`staging` as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + @@ -98,23 +98,23 @@ public void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -125,7 +125,7 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, " + "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -140,21 +140,21 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult oper // Stats String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00')))"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00')))) as `rowsInserted`"; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00')))) as `rowsTerminated`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00.000000')))) as `rowsInserted`"; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00.000000')))) as `rowsTerminated`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @Override - public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) + public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE " + "(sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + - "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND (sink.`digest` = stage.`digest`) " + @@ -181,11 +181,11 @@ public void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00' WHERE (sink.`BATCH_TIME_OUT` = '9999-12-31 23:59:59') AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00.000000' WHERE (sink.`BATCH_TIME_OUT` = '9999-12-31 23:59:59') AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_TIME_OUT` = '9999-12-31 23:59:59') AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_TIME_OUT` = '9999-12-31 23:59:59') AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableTimeBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java index e1624d0c58d..81c42dab355 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java @@ -14,15 +14,15 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; -import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotBatchIdBasedTestCases; import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBatchIdBasedTestCases { @@ -33,7 +33,7 @@ public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBat String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1)-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))))) as `rowsTerminated`"; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -62,6 +62,41 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + + "WHERE (sink.`batch_id_out` = 999999999) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(2)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(GeneratorResult operations) { @@ -92,7 +127,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -122,7 +157,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 22dfbe78a9f..2112ff4dedc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -14,15 +14,15 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; -import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotBatchIdDateTimeBasedTestCases; import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSnapshotBatchIdDateTimeBasedTestCases { @@ -33,14 +33,14 @@ public class UnitemporalSnapshotBatchIdDateTimeBasedTest extends UnitmemporalSna String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1)-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_id_in` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'))))) as `rowsTerminated`"; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -48,7 +48,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; @@ -62,6 +62,42 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + + "WHERE (sink.`batch_id_out` = 999999999) " + + "AND (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations) { @@ -69,7 +105,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa List milestoningSql = operations.ingestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE sink.`batch_id_out` = 999999999"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -78,31 +114,49 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa } @Override - public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = " + + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE " + + "UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00.000000' WHERE " + + "(sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE " + + "((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; + + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + + "(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink " + + "WHERE sink.`BATCH_ID_OUT` = 999999999)))"; - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00' WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink WHERE sink.`BATCH_ID_OUT` = 999999999)))"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQueryInUpperCase, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase, deduplicationAndVersioningSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlUpperCase, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithUpperCase(), metadataIngestSql.get(0)); } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -111,7 +165,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND (sink.`biz_date` = stage.`biz_date`))))"; @@ -139,14 +193,14 @@ public void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(G } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -155,7 +209,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; @@ -175,7 +229,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmp List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; @@ -201,7 +255,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -209,7 +263,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index 6dee33f7e13..0f96eeb95b6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -14,34 +14,34 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics; import org.finos.legend.engine.persistence.components.relational.RelationalSink; -import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalSnapshotDateTimeBasedTestCases; import org.junit.jupiter.api.Assertions; import java.util.List; +import java.util.Map; public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDateTimeBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00')))"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00.000000')))"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00')))) as `rowsInserted`"; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00')))) as `rowsTerminated`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00.000000')))) as `rowsInserted`"; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '2000-01-01 00:00:00.000000')-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '2000-01-01 00:00:00.000000') AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = '2000-01-01 00:00:00.000000')))) as `rowsTerminated`"; @Override - public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage " + @@ -50,7 +50,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '9999-12-31 23:59:59')))"; @@ -63,6 +63,44 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); + Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + + "AND (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = '9999-12-31 23:59:59')))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); + + Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + } + @Override public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(GeneratorResult operations) { @@ -71,7 +109,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandli List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE sink.`batch_time_out` = '9999-12-31 23:59:59'"; Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -89,7 +127,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET " + - "sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00' " + + "sink.`BATCH_TIME_OUT` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`BATCH_TIME_OUT` = '9999-12-31 23:59:59') AND " + "(NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -97,7 +135,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `MYDB`.`STAGING` as stage " + "WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink " + "WHERE sink.`BATCH_TIME_OUT` = '9999-12-31 23:59:59')))"; @@ -110,14 +148,14 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene } @Override - public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "SET sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -126,7 +164,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND (sink.`biz_date` = stage.`biz_date`))))"; @@ -140,14 +178,14 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "sink.`batch_time_out` = '2000-01-01 00:00:00.000000' " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) AND " + @@ -156,7 +194,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "'2000-01-01 00:00:00','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + + "'2000-01-01 00:00:00.000000','9999-12-31 23:59:59' FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_time_out` = '9999-12-31 23:59:59') AND " + "(sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java index 616630967cb..1cb69388daf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java @@ -17,10 +17,10 @@ public class SnowflakeTestArtifacts { public static String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\")" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00',SYSDATE(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00.000000',SYSDATE(),'DONE')"; public static String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (\"TABLE_NAME\", \"TABLE_BATCH_ID\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\")" + - " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00',SYSDATE(),'DONE')"; + " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00.000000',SYSDATE(),'DONE')"; public static String expectedMetadataTableCreateQuery = "CREATE TABLE IF NOT EXISTS batch_metadata" + "(\"table_name\" VARCHAR(255)," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index dc739b05cbc..70529e54d7e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -116,12 +116,12 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_int\", \"col_integer\", \"batch_id\", \"append_time\") " + "FROM " + - "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",{NEXT_BATCH_ID},'2000-01-01 00:00:00' " + + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",{NEXT_BATCH_ID},'2000-01-01 00:00:00.000000' " + "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage)" + " on_error = 'ABORT_STATEMENT'"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT {NEXT_BATCH_ID},'my_name','2000-01-01 00:00:00',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"task123\"}'))"; + "(SELECT {NEXT_BATCH_ID},'my_name','2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"task123\"}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -130,7 +130,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); } @Test @@ -231,14 +231,14 @@ public void testBulkLoadWithUpperCaseConversionAndNoTaskId() "FROM " + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('COL_INT',legend_persistence_stage.$1,'COL_INTEGER',legend_persistence_stage.$2))," + - "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME'),'2000-01-01 00:00:00' " + + "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME'),'2000-01-01 00:00:00.000000' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; String expectedMetadataIngestSql = "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + "(SELECT (SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')," + - "'MY_NAME','2000-01-01 00:00:00',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + "'MY_NAME','2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -247,7 +247,7 @@ public void testBulkLoadWithUpperCaseConversionAndNoTaskId() Assertions.assertEquals("SELECT 0 as \"ROWSDELETED\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"ROWSTERMINATED\"", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as \"ROWSUPDATED\"", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"MY_DB\".\"MY_NAME\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"MY_DB\".\"MY_NAME\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); } @Test @@ -368,7 +368,7 @@ public void testBulkLoadWithDigest() "FROM " + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + - "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME'),'2000-01-01 00:00:00' " + + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME'),'2000-01-01 00:00:00.000000' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; @@ -379,6 +379,6 @@ public void testBulkLoadWithDigest() Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java index 164f830f123..2b0010d6365 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java @@ -52,7 +52,7 @@ public class IngestModeTest String[] partitionKeys = new String[]{"biz_date"}; HashMap> partitionFilter = new HashMap>() {{ - put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00", "2000-01-02 00:00:00"))); + put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00.000000", "2000-01-02 00:00:00"))); }}; // Base Columns: Primary keys : id, name @@ -104,10 +104,10 @@ public class IngestModeTest "\"TABLE_BATCH_ID\" INTEGER)"; protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\")" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00',SYSDATE(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00.000000',SYSDATE(),'DONE')"; protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (\"TABLE_NAME\", \"TABLE_BATCH_ID\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\")" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.\"TABLE_NAME\" = 'main'),'2000-01-01 00:00:00',SYSDATE(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"TABLE_BATCH_ID\"),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.\"TABLE_NAME\" = 'main'),'2000-01-01 00:00:00.000000',SYSDATE(),'DONE')"; String expectedMainTableCreateQuery = "CREATE TABLE IF NOT EXISTS \"mydb\".\"main\"" + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java index 0c915338f49..49ffe9bfc6e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java @@ -35,7 +35,7 @@ public RelationalSink getRelationalSink() } @Override - public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -65,13 +65,13 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio } @Override - public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operations) + public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING \"mydb\".\"staging\" as stage " + + "USING \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + "THEN UPDATE SET " + @@ -80,10 +80,10 @@ public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operat "sink.\"amount\" = stage.\"amount\"," + "sink.\"biz_date\" = stage.\"biz_date\"," + "sink.\"digest\" = stage.\"digest\"," + - "sink.\"batch_update_time\" = '2000-01-01 00:00:00' " + + "sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000' " + "WHEN NOT MATCHED THEN INSERT " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "VALUES (stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00')"; + "VALUES (stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000')"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(mergeSql, milestoningSqlList.get(0)); @@ -95,7 +95,31 @@ public void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operat } @Override - public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges) + { + String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + + "as stage ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + + "THEN UPDATE SET sink.\"id\" = stage.\"id\",sink.\"name\" = stage.\"name\",sink.\"amount\" = stage.\"amount\",sink.\"biz_date\" = stage.\"biz_date\",sink.\"digest\" = stage.\"digest\" " + + "WHEN NOT MATCHED " + + "THEN INSERT (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + + "VALUES (stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\")"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, operations.get(0).preActionsSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); + + // Stats + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempStagingTable, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsTempStagingTable, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + + @Override + public void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage " + @@ -119,31 +143,31 @@ public void verifyNonTemporalDeltaNoAuditingWithDataSplit(List } @Override - public void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List operations, List dataSplitRanges) + public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges) { String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + - "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage " + + "USING (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + - "THEN UPDATE SET sink.\"id\" = stage.\"id\",sink.\"name\" = stage.\"name\",sink.\"amount\" = stage.\"amount\",sink.\"biz_date\" = stage.\"biz_date\",sink.\"digest\" = stage.\"digest\",sink.\"batch_update_time\" = '2000-01-01 00:00:00' " + + "THEN UPDATE SET sink.\"id\" = stage.\"id\",sink.\"name\" = stage.\"name\",sink.\"amount\" = stage.\"amount\",sink.\"biz_date\" = stage.\"biz_date\",sink.\"digest\" = stage.\"digest\",sink.\"batch_update_time\" = '2000-01-01 00:00:00.000000' " + "WHEN NOT MATCHED " + "THEN INSERT (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + - "VALUES (stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00')"; + "VALUES (stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",'2000-01-01 00:00:00.000000')"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(1)), operations.get(1).ingestSql().get(0)); // Stats - Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); - Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplits, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsWithDuplicates, dataSplitRanges.get(0)), operations.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCountWithSplitsWithDuplicates, dataSplitRanges.get(1)), operations.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsTerminated, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); Assertions.assertEquals(rowsDeleted, operations.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); } @Override - public void verifyNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator(GeneratorResult operations) + public void verifyNontemporalDeltaNoAuditingWithDeleteIndicatorNoDedupNoVersioning(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -242,15 +266,14 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult } @Override - public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(GeneratorResult operations) + public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + "USING " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\" FROM " + - "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\",ROW_NUMBER() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_row_num\" FROM \"mydb\".\"staging\" as stage WHERE stage.\"snapshot_id\" > 18972) as stage WHERE stage.\"legend_persistence_row_num\" = 1) as stage " + + "\"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + "WHEN MATCHED AND stage.\"version\" > sink.\"version\" " + "THEN UPDATE SET sink.\"id\" = stage.\"id\",sink.\"name\" = stage.\"name\",sink.\"amount\" = stage.\"amount\",sink.\"biz_date\" = stage.\"biz_date\",sink.\"digest\" = stage.\"digest\",sink.\"version\" = stage.\"version\" " + @@ -267,7 +290,7 @@ public void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(Ge } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -291,7 +314,7 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(Gene } @Override - public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaNoDedupMaxVersionWithoutPerform(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); @@ -314,15 +337,14 @@ public void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters( } @Override - public void verifyNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations) + public void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(GeneratorResult operations) { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); String mergeSql = "MERGE INTO \"MYDB\".\"MAIN\" as sink " + "USING " + - "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\" FROM " + - "(SELECT stage.\"ID\",stage.\"NAME\",stage.\"AMOUNT\",stage.\"BIZ_DATE\",stage.\"DIGEST\",stage.\"VERSION\",ROW_NUMBER() OVER (PARTITION BY stage.\"ID\",stage.\"NAME\" ORDER BY stage.\"VERSION\" DESC) as \"LEGEND_PERSISTENCE_ROW_NUM\" FROM \"MYDB\".\"STAGING\" as stage) as stage WHERE stage.\"LEGEND_PERSISTENCE_ROW_NUM\" = 1) as stage " + + "\"MYDB\".\"STAGING_LEGEND_PERSISTENCE_TEMP_STAGING\" as stage " + "ON (sink.\"ID\" = stage.\"ID\") AND (sink.\"NAME\" = stage.\"NAME\") " + "WHEN MATCHED AND stage.\"VERSION\" >= sink.\"VERSION\" " + "THEN UPDATE SET sink.\"ID\" = stage.\"ID\",sink.\"NAME\" = stage.\"NAME\",sink.\"AMOUNT\" = stage.\"AMOUNT\",sink.\"BIZ_DATE\" = stage.\"BIZ_DATE\",sink.\"DIGEST\" = stage.\"DIGEST\",sink.\"VERSION\" = stage.\"VERSION\" " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index c6025c62f8f..02e3c0512a8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -56,7 +56,7 @@ protected String getExpectedMetadataTableIngestQueryWithStagingFilters(String st "(\"table_name\", \"table_batch_id\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"staging_filters\") " + "(SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')," + - "'2000-01-01 00:00:00',SYSDATE(),'DONE'," + + "'2000-01-01 00:00:00.000000',SYSDATE(),'DONE'," + String.format("PARSE_JSON('%s'))", stagingFilters); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java index 4a5a9dd4992..820d4783d74 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java @@ -24,13 +24,13 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name','2000-01-01 00:00:00.000000',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"TABLE_NAME\") = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"TABLE_NAME\") = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00.000000',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index 7ba5fea6187..adf0d700745 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -75,6 +75,7 @@ public class BaseTest protected String digestField = "digest"; protected String versionField = "version"; + protected String bizDateField = "biz_date"; protected String snapshotIdField = "snapshot_id"; protected String dataSplitField = "data_split"; protected String batchUpdateTimeField = "batch_update_time"; @@ -95,7 +96,6 @@ public class BaseTest {{ put("biz_date", new HashSet<>(Arrays.asList("2000-01-01 00:00:00", "2000-01-02 00:00:00"))); }}; - protected String[] bitemporalPartitionKeys = new String[]{validityFromReferenceField}; // Base Columns: Primary keys : id, name protected Field id = Field.builder().name("id").type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())).primaryKey(true).build(); @@ -154,48 +154,12 @@ public class BaseTest .addFields(bizDate) .build(); - protected SchemaDefinition baseTableSchemaWithDataSplit = SchemaDefinition.builder() - .addFields(id) - .addFields(name) - .addFields(amount) - .addFields(bizDate) - .addFields(dataSplit) - .build(); - protected SchemaDefinition baseTableShortenedSchema = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .build(); - protected SchemaDefinition stagingTableEvolvedSize = SchemaDefinition.builder() - .addFields(id) - .addFields(nameModified) - .addFields(amount) - .addFields(bizDate) - .build(); - - protected SchemaDefinition stagingTableImplicitDatatypeChange = SchemaDefinition.builder() - .addFields(id) - .addFields(name) - .addFields(floatAmount) - .addFields(bizDate) - .build(); - - protected SchemaDefinition stagingTableNonBreakingDatatypeChange = SchemaDefinition.builder() - .addFields(tinyIntId) - .addFields(name) - .addFields(amount) - .addFields(bizDate) - .build(); - - protected SchemaDefinition stagingTableBreakingDatatypeChange = SchemaDefinition.builder() - .addFields(tinyIntString) - .addFields(name) - .addFields(amount) - .addFields(bizDate) - .build(); - protected SchemaDefinition mainTableSchemaWithBatchIdAndTime = SchemaDefinition.builder() .addFields(id) .addFields(name) @@ -331,15 +295,6 @@ public class BaseTest .addFields(batchUpdateTime) .build(); - protected SchemaDefinition baseTableSchemaWithUpdateBatchTimeFieldNotPk = SchemaDefinition.builder() - .addFields(id) - .addFields(name) - .addFields(amount) - .addFields(bizDate) - .addFields(digest) - .addFields(batchUpdateTimeNonPK) - .build(); - protected SchemaDefinition stagingTableSchemaWithLimitedColumns = SchemaDefinition.builder() .addFields(id) .addFields(name) @@ -356,16 +311,6 @@ public class BaseTest .addFields(deleteIndicator) .build(); - protected SchemaDefinition stagingTableSchemaWithDeleteIndicatorWithDataSplit = SchemaDefinition.builder() - .addFields(id) - .addFields(name) - .addFields(amount) - .addFields(bizDate) - .addFields(digest) - .addFields(deleteIndicator) - .addFields(dataSplit) - .build(); - protected SchemaDefinition stagingTableSchemaWithBooleanDeleteIndicator = SchemaDefinition.builder() .addFields(id) .addFields(name) @@ -386,11 +331,12 @@ public class BaseTest .addFields(validityThroughTarget) .build(); - protected SchemaDefinition bitemporalMainTableSchemaWithBatchIdAndTime = SchemaDefinition.builder() + protected SchemaDefinition bitemporalMainTableSchemaWithVersionBatchIdAndTime = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .addFields(digest) + .addFields(version) .addFields(batchIdIn) .addFields(batchIdOut) .addFields(batchTimeInNonPrimary) @@ -399,11 +345,12 @@ public class BaseTest .addFields(validityThroughTarget) .build(); - protected SchemaDefinition bitemporalMainTableSchemaWithDateTime = SchemaDefinition.builder() + protected SchemaDefinition bitemporalMainTableSchemaWithVersionBatchDateTime = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .addFields(digest) + .addFields(version) .addFields(batchTimeIn) .addFields(batchTimeOut) .addFields(validityFromTarget) @@ -421,6 +368,18 @@ public class BaseTest .addFields(validityThroughTarget) .build(); + protected SchemaDefinition bitemporalFromOnlyMainTableWithVersionSchema = SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(amount) + .addFields(digest) + .addFields(version) + .addFields(batchIdIn) + .addFields(batchIdOut) + .addFields(validityFromTarget) + .addFields(validityThroughTarget) + .build(); + protected SchemaDefinition bitemporalFromOnlyMainTableBatchIdAndTimeBasedSchema = SchemaDefinition.builder() .addFields(id) .addFields(name) @@ -454,13 +413,14 @@ public class BaseTest .addFields(digest) .build(); - protected SchemaDefinition bitemporalStagingTableSchemaWithDataSplit = SchemaDefinition.builder() + protected SchemaDefinition bitemporalStagingTableSchemaWithVersionWithDataSplit = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .addFields(validityFromReference) .addFields(validityThroughReference) .addFields(digest) + .addFields(version) .addFields(dataSplit) .build(); @@ -474,13 +434,14 @@ public class BaseTest .addFields(deleteIndicator) .build(); - protected SchemaDefinition bitemporalStagingTableSchemaWithDeleteIndicatorAndDataSplit = SchemaDefinition.builder() + protected SchemaDefinition bitemporalStagingTableSchemaWithDeleteIndicatorVersionAndDataSplit = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .addFields(validityFromReference) .addFields(validityThroughReference) .addFields(digest) + .addFields(version) .addFields(dataSplit) .addFields(deleteIndicator) .build(); @@ -493,12 +454,13 @@ public class BaseTest .addFields(digest) .build(); - protected SchemaDefinition bitemporalFromOnlyStagingTableSchemaWithDataSplit = SchemaDefinition.builder() + protected SchemaDefinition bitemporalFromOnlyStagingTableSchemaWithVersionWithDataSplit = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .addFields(validityFromReference) .addFields(digest) + .addFields(version) .addFields(dataSplit) .build(); @@ -511,12 +473,13 @@ public class BaseTest .addFields(deleteIndicator) .build(); - protected SchemaDefinition bitemporalFromOnlyStagingTableSchemaWithDeleteIndicatorWithDataSplit = SchemaDefinition.builder() + protected SchemaDefinition bitemporalFromOnlyStagingTableSchemaWithDeleteIndicatorWithVersionWithDataSplit = SchemaDefinition.builder() .addFields(id) .addFields(name) .addFields(amount) .addFields(validityFromReference) .addFields(digest) + .addFields(version) .addFields(deleteIndicator) .addFields(dataSplit) .build(); @@ -532,6 +495,18 @@ public class BaseTest .addFields(validityThroughTarget) .build(); + protected SchemaDefinition bitemporalFromOnlyTempTableWithVersionSchema = SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(amount) + .addFields(digest) + .addFields(version) + .addFields(batchIdIn) + .addFields(batchIdOut) + .addFields(validityFromTarget) + .addFields(validityThroughTarget) + .build(); + protected SchemaDefinition bitemporalFromOnlyTempTableWithDeleteIndicatorSchema = SchemaDefinition.builder() .addFields(id) .addFields(name) @@ -652,21 +627,11 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(baseTableSchemaWithUpdateBatchTimeField) .build(); - protected Dataset mainTableWithBaseSchemaHavingAuditFieldNotPk = DatasetDefinition.builder() - .database(mainDbName).name(mainTableName).alias(mainTableAlias) - .schema(baseTableSchemaWithUpdateBatchTimeFieldNotPk) - .build(); - protected Dataset stagingTableWithBaseSchemaHavingDigestAndDataSplit = DatasetDefinition.builder() .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) .schema(baseTableSchemaWithDigestAndDataSplit) .build(); - protected Dataset stagingTableWithBaseSchemaHavingDataSplit = DatasetDefinition.builder() - .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) - .schema(baseTableSchemaWithDataSplit) - .build(); - protected Dataset mainTableWithBatchIdBasedSchema = DatasetDefinition.builder() .database(mainDbName).name(mainTableName).alias(mainTableAlias) .schema(mainTableBatchIdBasedSchema) @@ -692,13 +657,6 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(stagingTableSchemaWithBooleanDeleteIndicator) .build(); - protected Dataset stagingTableWithDeleteIndicatorWithDataSplit = DatasetDefinition.builder() - .database(stagingDbName) - .name(stagingTableName) - .alias(stagingTableAlias) - .schema(stagingTableSchemaWithDeleteIndicatorWithDataSplit) - .build(); - protected Dataset mainTableWithBatchIdAndTime = DatasetDefinition.builder() .database(mainDbName).name(mainTableName).alias(mainTableAlias) .schema(mainTableSchemaWithBatchIdAndTime) @@ -714,9 +672,9 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(bitemporalMainTableSchema) .build(); - protected Dataset mainTableWithBitemporalSchemaWithDateTime = DatasetDefinition.builder() + protected Dataset mainTableWithBitemporalSchemaWithVersionBatchDateTime = DatasetDefinition.builder() .database(mainDbName).name(mainTableName).alias(mainTableAlias) - .schema(bitemporalMainTableSchemaWithDateTime) + .schema(bitemporalMainTableSchemaWithVersionBatchDateTime) .build(); protected Dataset stagingTableWithBitemporalSchema = DatasetDefinition.builder() @@ -731,21 +689,21 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(bitemporalStagingTableSchemaWithDeleteIndicator) .build(); - protected Dataset stagingTableWithBitemporalSchemaWithDeleteIndicatorAndDataSplit = DatasetDefinition.builder() + protected Dataset stagingTableWithBitemporalSchemaWithDeleteIndicatorVersionAndDataSplit = DatasetDefinition.builder() .database(stagingDbName) .name(stagingTableName) .alias(stagingTableAlias) - .schema(bitemporalStagingTableSchemaWithDeleteIndicatorAndDataSplit) + .schema(bitemporalStagingTableSchemaWithDeleteIndicatorVersionAndDataSplit) .build(); - protected Dataset mainTableWithBitemporalSchemaWithBatchIdAndTime = DatasetDefinition.builder() + protected Dataset mainTableWithBitemporalSchemaWithVersionBatchIdAndTime = DatasetDefinition.builder() .database(mainDbName).name(mainTableName).alias(mainTableAlias) - .schema(bitemporalMainTableSchemaWithBatchIdAndTime) + .schema(bitemporalMainTableSchemaWithVersionBatchIdAndTime) .build(); - protected Dataset stagingTableWithBitemporalSchemaWithDataSplit = DatasetDefinition.builder() + protected Dataset stagingTableWithBitemporalSchemaWithVersionWithDataSplit = DatasetDefinition.builder() .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) - .schema(bitemporalStagingTableSchemaWithDataSplit) + .schema(bitemporalStagingTableSchemaWithVersionWithDataSplit) .build(); protected DatasetDefinition mainTableWithBitemporalFromOnlySchema = DatasetDefinition.builder() @@ -755,6 +713,13 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(bitemporalFromOnlyMainTableSchema) .build(); + protected DatasetDefinition mainTableWithBitemporalFromOnlyWithVersionSchema = DatasetDefinition.builder() + .database(mainDbName) + .name(mainTableName) + .alias(mainTableAlias) + .schema(bitemporalFromOnlyMainTableWithVersionSchema) + .build(); + protected DatasetDefinition mainTableWithBitemporalFromOnlyWithBatchIdAndTimeBasedSchema = DatasetDefinition.builder() .database(mainDbName) .name(mainTableName) @@ -790,11 +755,11 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(bitemporalFromOnlyStagingTableSchema) .build(); - protected DatasetDefinition stagingTableWithBitemporalFromOnlySchemaWithDataSplit = DatasetDefinition.builder() + protected DatasetDefinition stagingTableWithBitemporalFromOnlySchemaWithVersionWithDataSplit = DatasetDefinition.builder() .database(stagingDbName) .name(stagingTableName) .alias(stagingTableAlias) - .schema(bitemporalFromOnlyStagingTableSchemaWithDataSplit) + .schema(bitemporalFromOnlyStagingTableSchemaWithVersionWithDataSplit) .build(); protected DatasetDefinition tempTableWithBitemporalFromOnlySchema = DatasetDefinition.builder() @@ -804,6 +769,13 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(bitemporalFromOnlyTempTableSchema) .build(); + protected DatasetDefinition tempTableWithBitemporalFromOnlyWithVersionSchema = DatasetDefinition.builder() + .database(tempDbName) + .name(tempTableName) + .alias(tempTableAlias) + .schema(bitemporalFromOnlyTempTableWithVersionSchema) + .build(); + protected DatasetDefinition stagingTableWithBitemporalFromOnlySchemaWithDeleteInd = DatasetDefinition.builder() .database(stagingDbName) .name(stagingTableName) @@ -811,11 +783,11 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(bitemporalFromOnlyStagingTableSchemaWithDeleteIndicator) .build(); - protected DatasetDefinition stagingTableWithBitemporalFromOnlySchemaWithDeleteIndWithDataSplit = DatasetDefinition.builder() + protected DatasetDefinition stagingTableWithBitemporalFromOnlySchemaWithDeleteIndWithVersionWithDataSplit = DatasetDefinition.builder() .database(stagingDbName) .name(stagingTableName) .alias(stagingTableAlias) - .schema(bitemporalFromOnlyStagingTableSchemaWithDeleteIndicatorWithDataSplit) + .schema(bitemporalFromOnlyStagingTableSchemaWithDeleteIndicatorWithVersionWithDataSplit) .build(); protected DatasetDefinition stagingTableBitemporalWithoutDuplicates = DatasetDefinition.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java index 71a68981a46..3c71d88d913 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java @@ -21,8 +21,10 @@ import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; - -import java.util.Optional; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; public class AppendOnlyScenarios extends BaseTest { @@ -31,123 +33,198 @@ public class AppendOnlyScenarios extends BaseTest Test Scenarios for Non-temporal Delta Variables: 1) Auditing: No Auditing, With Auditing - 2) DataSplit: Enabled, Disabled - 3) DeduplicationStrategy: Allow_Duplicates, Filter Duplicates, Fail on Duplicates - - Valid Combinations: - 1) Allow_Duplicates, No Auditing - 2) Allow_Duplicates, With Auditing - 3) Allow_Duplicates, With Auditing, With Data Splits + 2) Versioning: NoVersion, MaxVersion, AllVersion + 3) Deduplication: Allow Duplicates, Filter Duplicates, Fail on Duplicates + 4) filterExistingRecords: true / false - 4) Fail on Duplicates, No Auditing - 5) Fail on Duplicates, With Auditing - 6) Filter Duplicates, No Auditing - 7) Filter Duplicates, With Auditing - 8) Filter Duplicates, With Auditing, With Data Splits - - Invalid Combinations - 1) Any Deduplication Strategy, No Auditing, With Data Splits - 2) Fail on Duplicates, With Data Splits + Valid Combinations: + NoVersion: + 1) With Auditing, NoVersion, Allow Duplicates, true + 2) With Auditing, NoVersion, Filter Duplicates, true - tested (perform deduplication, auditing, filter existing) + 3) With Auditing, NoVersion, Fail on Duplicates, true + 4) No Auditing, NoVersion, Allow Duplicates, false - tested (the most basic case) + 5) With Auditing, NoVersion, Allow Duplicates, false + 6) No Auditing, NoVersion, Filter Duplicates, false + 7) With Auditing, NoVersion, Filter Duplicates, false + 8) No Auditing, NoVersion, Fail on Duplicates, false + 9) With Auditing, NoVersion, Fail on Duplicates, false + + MaxVersion: + 10) With Auditing, MaxVersion, Allow Duplicates, true + 11) With Auditing, MaxVersion, Filter Duplicates, true + 12) With Auditing, MaxVersion, Fail on Duplicates, true - tested (perform deduplication and versioning, auditing, filter existing) + 13) With Auditing, MaxVersion, Allow Duplicates, false + 14) With Auditing, MaxVersion, Filter Duplicates, false - tested (perform deduplication and versioning, auditing) + 15) With Auditing, MaxVersion, Fail on Duplicates, false + + AllVersion: + 16) With Auditing, AllVersion, Allow Duplicates, true + 17) With Auditing, AllVersion, Filter Duplicates, true - tested (perform deduplication and versioning, data split, auditing, filter existing) + 18) With Auditing, AllVersion, Fail on Duplicates, true + 19) With Auditing, AllVersion, Allow Duplicates, false + 20) With Auditing, AllVersion, Filter Duplicates, false + 21) With Auditing, AllVersion, Fail on Duplicates, false - tested (perform deduplication and versioning, data split, auditing) + + + Invalid Combinations: + NoAuditing + MaxVersion/AllVersion: + 22) No Auditing, MaxVersion, Allow Duplicates, true + 23) No Auditing, MaxVersion, Filter Duplicates, true + 24) No Auditing, MaxVersion, Fail on Duplicates, true + 25) No Auditing, MaxVersion, Allow Duplicates, false + 26) No Auditing, MaxVersion, Filter Duplicates, false + 27) No Auditing, MaxVersion, Fail on Duplicates, false + 28) No Auditing, AllVersion, Allow Duplicates, true + 29) No Auditing, AllVersion, Filter Duplicates, true + 30) No Auditing, AllVersion, Fail on Duplicates, true + 31) No Auditing, AllVersion, Allow Duplicates, false + 32) No Auditing, AllVersion, Filter Duplicates, false - tested + 33) No Auditing, AllVersion, Fail on Duplicates, false + + NoAuditing + filterExistingRecords + 34) No Auditing, NoVersion, Allow Duplicates, true - tested + 35) No Auditing, NoVersion, Filter Duplicates, true + 36) No Auditing, NoVersion, Fail on Duplicates, true */ - public TestScenario ALLOW_DUPLICATES_NO_AUDITING() + public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() .digestField(digestField) .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) .build(); return new TestScenario(mainTableWithNoPrimaryKeys, stagingTableWithNoPrimaryKeys, ingestMode); } - public TestScenario ALLOW_DUPLICATES_NO_AUDITING_DERIVE_MAIN_SCHEMA() + public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS__DERIVE_MAIN_SCHEMA() { - TestScenario scenario = ALLOW_DUPLICATES_NO_AUDITING(); + TestScenario scenario = NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS(); scenario.setMainTable(mainTableWithNoFields); return scenario; } - public TestScenario ALLOW_DUPLICATES_WITH_AUDITING() + public TestScenario WITH_AUDITING__FILTER_DUPS__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) - .deduplicationStrategy(AllowDuplicates.builder().build()) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) - .build(); - return new TestScenario(mainTableWithNoPrimaryKeysHavingAuditField, stagingTableWithNoPrimaryKeys, ingestMode); + .digestField(digestField) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .filterExistingRecords(true) + .build(); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario ALLOW_DUPLICATES_WITH_AUDITING__WITH_DATASPLIT() + public TestScenario WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() .digestField(digestField) - .deduplicationStrategy(AllowDuplicates.builder().build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(bizDateField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) - .dataSplitField(Optional.of(dataSplitField)) + .filterExistingRecords(false) .build(); - return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario FAIL_ON_DUPLICATES_NO_AUDITING() + // failure case + public TestScenario NO_AUDITING__FILTER_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() .digestField(digestField) - .deduplicationStrategy(FailOnDuplicates.builder().build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(bizDateField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) .build(); return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario FAIL_ON_DUPLICATES_WITH_AUDITING() + public TestScenario WITH_AUDITING__FILTER_DUPS__ALL_VERSION__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() .digestField(digestField) - .deduplicationStrategy(FailOnDuplicates.builder().build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(bizDateField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .filterExistingRecords(true) .build(); - return new TestScenario(mainTableWithBaseSchemaHavingAuditFieldNotPk, stagingTableWithBaseSchema, ingestMode); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario FILTER_DUPLICATES_NO_AUDITING() + public TestScenario WITH_AUDITING__FAIL_ON_DUPS__MAX_VERSION__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); - return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaAndDigest, ingestMode); + .digestField(digestField) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(bizDateField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .filterExistingRecords(true) + .build(); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario FILTER_DUPLICATES_NO_AUDITING_WITH_DATA_SPLIT() + public TestScenario WITH_AUDITING__FILTER_DUPS__MAX_VERSION__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(NoAuditing.builder().build()) - .build(); - return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); + .digestField(digestField) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(bizDateField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .filterExistingRecords(false) + .build(); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario FILTER_DUPLICATES_WITH_AUDITING() + // failure case + public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) - .build(); - return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); + .digestField(digestField) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .filterExistingRecords(true) + .build(); + return new TestScenario(mainTableWithNoPrimaryKeys, stagingTableWithNoPrimaryKeys, ingestMode); } - public TestScenario FILTER_DUPLICATES_WITH_AUDITING_WITH_DATA_SPLIT() + public TestScenario WITH_AUDITING__ALLOW_DUPLICATES__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) - .deduplicationStrategy(FilterDuplicates.builder().build()) - .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) - .dataSplitField(Optional.of(dataSplitField)) - .build(); - return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); + .digestField(digestField) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .filterExistingRecords(false) + .build(); + return new TestScenario(mainTableWithNoPrimaryKeysHavingAuditField, stagingTableWithNoPrimaryKeys, ingestMode); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java index 020a4b3d524..41bba4b65b6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java @@ -16,19 +16,19 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.BitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromAndThruDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import java.util.Arrays; -import java.util.Optional; public class BitemporalDeltaSourceSpecifiesFromAndThroughScenarios extends BaseTest { @@ -71,7 +71,12 @@ public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS() { BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchIdAndDateTime.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -87,7 +92,7 @@ public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS() .build()) .build()) .build(); - return new TestScenario(mainTableWithBitemporalSchemaWithBatchIdAndTime, stagingTableWithBitemporalSchemaWithDataSplit, ingestMode); + return new TestScenario(mainTableWithBitemporalSchemaWithVersionBatchIdAndTime, stagingTableWithBitemporalSchemaWithVersionWithDataSplit, ingestMode); } public TestScenario BATCH_ID_BASED__WITH_DEL_IND__NO_DATA_SPLITS() @@ -118,7 +123,12 @@ public TestScenario DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() { BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestField) - .dataSplitField(dataSplitField) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(TransactionDateTime.builder() .dateTimeInName(batchTimeInField) .dateTimeOutName(batchTimeOutField) @@ -136,7 +146,7 @@ public TestScenario DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) .build(); - return new TestScenario(mainTableWithBitemporalSchemaWithDateTime, stagingTableWithBitemporalSchemaWithDeleteIndicatorAndDataSplit, ingestMode); + return new TestScenario(mainTableWithBitemporalSchemaWithVersionBatchDateTime, stagingTableWithBitemporalSchemaWithDeleteIndicatorVersionAndDataSplit, ingestMode); } public TestScenario BATCH_ID_BASED__VALIDITY_FIELDS_SAME_NAME() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java index 7cf1d886f6c..66a8808867b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java @@ -17,19 +17,19 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.BitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import java.util.Arrays; -import java.util.Optional; public class BitemporalDeltaSourceSpecifiesFromOnlyScenarios extends BaseTest { @@ -86,7 +86,12 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS() { BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -102,9 +107,9 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS() TestScenario testScenario = new TestScenario(ingestMode); testScenario.setDatasets(Datasets.builder() - .mainDataset(mainTableWithBitemporalFromOnlySchema) - .stagingDataset(stagingTableWithBitemporalFromOnlySchemaWithDataSplit) - .tempDataset(tempTableWithBitemporalFromOnlySchema) + .mainDataset(mainTableWithBitemporalFromOnlyWithVersionSchema) + .stagingDataset(stagingTableWithBitemporalFromOnlySchemaWithVersionWithDataSplit) + .tempDataset(tempTableWithBitemporalFromOnlyWithVersionSchema) .build()); return testScenario; } @@ -143,7 +148,12 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS__USING_DEFAUL { BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -160,7 +170,7 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS__USING_DEFAUL .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) .build(); - return new TestScenario(mainTableWithBitemporalFromOnlySchema, stagingTableWithBitemporalFromOnlySchemaWithDeleteIndWithDataSplit, ingestMode); + return new TestScenario(mainTableWithBitemporalFromOnlyWithVersionSchema, stagingTableWithBitemporalFromOnlySchemaWithDeleteIndWithVersionWithDataSplit, ingestMode); } public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__FILTER_DUPLICATES() @@ -178,7 +188,7 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__FILTER_DUPLICATE .sourceDateTimeFromField(validityFromReferenceField) .build()) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); TestScenario testScenario = new TestScenario(ingestMode); testScenario.setDatasets(Datasets.builder() @@ -194,7 +204,12 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS__FILTER_DUPLICA { BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -206,7 +221,7 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS__FILTER_DUPLICA .sourceDateTimeFromField(validityFromReferenceField) .build()) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); TestScenario testScenario = new TestScenario(ingestMode); @@ -214,13 +229,13 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS__FILTER_DUPLICA .database(stagingWithoutDuplicatesDbName) .name(stagingTableWithoutDuplicatesName) .alias(stagingTableWithoutDuplicatesAlias) - .schema(bitemporalFromOnlyStagingTableSchemaWithDataSplit) + .schema(bitemporalFromOnlyStagingTableSchemaWithVersionWithDataSplit) .build(); testScenario.setDatasets(Datasets.builder() - .mainDataset(mainTableWithBitemporalFromOnlySchema) - .stagingDataset(stagingTableWithBitemporalFromOnlySchemaWithDataSplit) - .tempDataset(tempTableWithBitemporalFromOnlySchema) + .mainDataset(mainTableWithBitemporalFromOnlyWithVersionSchema) + .stagingDataset(stagingTableWithBitemporalFromOnlySchemaWithVersionWithDataSplit) + .tempDataset(tempTableWithBitemporalFromOnlyWithVersionSchema) .stagingDatasetWithoutDuplicates(stagingTableWithoutDuplicates) .build()); return testScenario; @@ -245,7 +260,7 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__NO_DATA_SPLITS__FILTER_DUPLICA .deleteField(deleteIndicatorField) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); TestScenario testScenario = new TestScenario(ingestMode); @@ -269,7 +284,12 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS__FILTER_DUPLI { BitemporalDelta ingestMode = BitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(false) + .build()) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -285,10 +305,10 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS__FILTER_DUPLI .deleteField(deleteIndicatorField) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) - .deduplicationStrategy(FilterDuplicates.builder().build()) + .filterExistingRecords(true) .build(); - return new TestScenario(mainTableWithBitemporalFromOnlySchema, stagingTableWithBitemporalFromOnlySchemaWithDeleteIndWithDataSplit, ingestMode); + return new TestScenario(mainTableWithBitemporalFromOnlyWithVersionSchema, stagingTableWithBitemporalFromOnlySchemaWithDeleteIndWithVersionWithDataSplit, ingestMode); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java index 4029791bbbb..5fb0be6ba7a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java @@ -20,11 +20,15 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; -import java.util.Optional; - -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningComparator; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; public class NonTemporalDeltaScenarios extends BaseTest { @@ -33,12 +37,12 @@ public class NonTemporalDeltaScenarios extends BaseTest Test Scenarios for Non-temporal Delta Variables: 1) Auditing: No Auditing, With Auditing - 2) DataSplit: Enabled, Disabled - 3) MergeStrategy: No MergeStrategy, With Delete Indicator - 4) DerivedDataset with different InterBatchDedupStrategy + 2) MergeStrategy: No MergeStrategy, With Delete Indicator + 3) Deduplication: Allow duplicates, Filter duplicates, Fail on duplicates + 4) Versioning: No Versioning, Max Versioning, All Versioning */ - public TestScenario NO_AUDTING__NO_DATASPLIT() + public TestScenario NO_AUDTING__NO_DEDUP__NO_VERSIONING() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) @@ -47,7 +51,7 @@ public TestScenario NO_AUDTING__NO_DATASPLIT() return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario NO_AUDTING__NO_DATASPLIT__WITH_DELETE_INDICATOR() + public TestScenario NO_AUDTING__WITH_DELETE_INDICATOR__NO_DEDUP__NO_VERSIONING() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) @@ -61,33 +65,47 @@ public TestScenario NO_AUDTING__NO_DATASPLIT__WITH_DELETE_INDICATOR() return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaAndDigestAndDeleteIndicator, ingestMode); } - public TestScenario NO_AUDTING__WITH_DATASPLIT() + public TestScenario NO_AUDTING__NO_DEDUP__ALL_VERSION() + { + NontemporalDelta ingestMode = NontemporalDelta.builder() + .digestField(digestField) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").dataSplitFieldName(dataSplitField).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .build(); + return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaAndDigest, ingestMode); + } + + public TestScenario NO_AUDTING__NO_DEDUP__ALL_VERSION_WITHOUT_PERFORM() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(NoAuditing.builder().build()) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").dataSplitFieldName(dataSplitField).performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); } - public TestScenario WITH_AUDTING__NO_DATASPLIT() + public TestScenario WITH_AUDTING__FILTER_DUPLICATES__NO_VERSIONING() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario WITH_AUDTING__WITH_DATASPLIT() + public TestScenario WITH_AUDTING__FAIL_ON_DUPS__ALL_VERSION() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) - .dataSplitField(Optional.of(dataSplitField)) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).dataSplitFieldName(dataSplitField).build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) .build(); - return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchemaAndDigest, ingestMode); } public TestScenario NO_VERSIONING__WITH_STAGING_FILTER() @@ -99,58 +117,61 @@ public TestScenario NO_VERSIONING__WITH_STAGING_FILTER() return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithFilters, ingestMode); } - public TestScenario MAX_VERSIONING_WITH_GREATER_THAN__DEDUP__WITH_STAGING_FILTER() + public TestScenario FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTER() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(NoAuditing.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(version.name()) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(true) + .mergeDataVersionResolver(VersionColumnBasedResolver.builder().versionComparator(VersionComparator.GREATER_THAN).build()) + .performStageVersioning(true) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithVersion, stagingTableWithVersionAndSnapshotId, ingestMode); } - public TestScenario MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITH_STAGING_FILTER() + public TestScenario NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM__WITH_STAGING_FILTER() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(NoAuditing.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(version.name()) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) .build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithVersion, stagingTableWithVersionAndSnapshotId, ingestMode); } - public TestScenario MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITHOUT_STAGING_FILTER() + public TestScenario NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(NoAuditing.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(version.name()) - .versioningComparator(VersioningComparator.GREATER_THAN) - .performDeduplication(false) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(false) .build()) .build(); return new TestScenario(mainTableWithVersion, stagingTableWithVersion, ingestMode); } - public TestScenario MAX_VERSIONING_WITH_GREATER_THAN_EQUAL__DEDUP__WITHOUT_STAGING_FILTER() + public TestScenario NO_DEDUP__MAX_VERSION() { NontemporalDelta ingestMode = NontemporalDelta.builder() .digestField(digestField) .auditing(NoAuditing.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(version.name()) - .versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO) - .performDeduplication(true) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)) + .performStageVersioning(true) .build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithVersion, stagingTableWithVersion, ingestMode); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NontemporalSnapshotTestScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NontemporalSnapshotTestScenarios.java index 04a9bcb372a..fb5a61bff51 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NontemporalSnapshotTestScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NontemporalSnapshotTestScenarios.java @@ -18,6 +18,10 @@ import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; public class NontemporalSnapshotTestScenarios extends BaseTest { @@ -26,41 +30,41 @@ public class NontemporalSnapshotTestScenarios extends BaseTest Test Scenarios of Non-temporal Snapshot Variables: 1) Auditing: No Auditing, With Auditing - 2) DataSplit: Enabled, Disabled + 2) Deduplication: Allow duplicates, Filter duplicates, Fail on duplicates + 3) Versioning: No Versioning, Max Versioning + + Valid Scenarios: + 1. No Auditing , Allow Dups , No Versioining + 2. With Auditing, Filter Dups, No Versioining + 3. With Auditing, Fail on duplicates, Max version + + Invalid Scenario: + 1. All Versioning */ - public TestScenario NO_AUDTING__NO_DATASPLIT() + public TestScenario NO_AUDTING__NO_DEDUP__NO_VERSIONING() { NontemporalSnapshot ingestMode = NontemporalSnapshot.builder().auditing(NoAuditing.builder().build()).build(); return new TestScenario(mainTableWithBaseSchema, stagingTableWithBaseSchema, ingestMode); } - public TestScenario NO_AUDTING__WITH_DATASPLIT() - { - NontemporalSnapshot ingestMode = NontemporalSnapshot.builder() - .auditing(NoAuditing.builder().build()) - .dataSplitField(dataSplitField) - .build(); - return new TestScenario(mainTableWithBaseSchema, stagingTableWithBaseSchemaHavingDataSplit, ingestMode); - } - - public TestScenario WITH_AUDTING__NO_DATASPLIT() + public TestScenario WITH_AUDTING__FILTER_DUPLICATES__NO_VERSIONING() { NontemporalSnapshot ingestMode = NontemporalSnapshot.builder() .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithBaseSchemaHavingAuditField, stagingTableWithBaseSchema, ingestMode); } - public TestScenario WITH_AUDTING__WITH_DATASPLIT() + public TestScenario WITH_AUDTING__FAIL_ON_DUP__MAX_VERSION() { NontemporalSnapshot ingestMode = NontemporalSnapshot.builder() .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) - .dataSplitField("data_split") + .versioningStrategy(MaxVersionStrategy.builder().versioningField(bizDateField).build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) .build(); - return new TestScenario(mainTableWithBaseSchemaHavingAuditField, stagingTableWithBaseSchemaHavingDataSplit, ingestMode); + return new TestScenario(mainTableWithBaseSchemaHavingAuditField, stagingTableWithBaseSchema, ingestMode); } - - - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/TestScenario.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/TestScenario.java index 1891b57d339..b8dbd83de98 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/TestScenario.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/TestScenario.java @@ -65,6 +65,6 @@ public void setDatasets(Datasets datasets) public void setMainTable(Dataset dataset) { - this.mainTable = mainTable; + this.mainTable = dataset; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java index e21e0869862..49b3f90aa94 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java @@ -17,34 +17,32 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.OptimizationFilter; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.NoVersioningStrategy; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.VersioningComparator; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; import java.util.Arrays; -import java.util.Optional; public class UnitemporalDeltaBatchIdBasedScenarios extends BaseTest { - /* Test Scenarios for Non-temporal Delta Variables: 1) transactionMilestoning = BatchId 2) deleteIndicator : Enabled, Disabled - 3) DataSplit: Enabled, Disabled - - Valid Combinations: - 1) No Delete Ind, No Data Splits - 2) No Delete Ind, With Data Splits - 3) With Delete Ind, No Data Splits - 4) With Delete Ind, With Data Splits - 5) No Delete Ind, No Data Splits, With Filter Predicates + 3) Deduplication: Allow duplicates, Filter duplicates, Fail on duplicates + 4) Versioning: No Versioning, Max Versioning, All Versioning */ - public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS() + + public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -56,21 +54,20 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS() return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__ALL_VERSION_WITHOUT_PERFORM() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").dataSplitFieldName(dataSplitField).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(false).build()) .build(); - return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); } - public TestScenario BATCH_ID_BASED__WITH_DEL_IND__NO_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__WITH_DEL_IND__FILTER_DUPS__NO_VERSIONING() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -82,15 +79,15 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__NO_DATA_SPLITS() .deleteField(deleteIndicatorField) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithDeleteIndicator, ingestMode); } - public TestScenario BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__WITH_DEL_IND__NO_DEDUP__ALL_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) .transactionMilestoning(BatchId.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -99,12 +96,13 @@ public TestScenario BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() .deleteField(deleteIndicatorField) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).dataSplitFieldName(dataSplitField).build()) .build(); - return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithDeleteIndicatorWithDataSplit, ingestMode); + return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithDeleteIndicator, ingestMode); } - public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATION_FILTERS() + public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_OPTIMIZATION_FILTERS() { OptimizationFilter filter = OptimizationFilter.of("id", "{ID_LOWER_BOUND}", "{ID_UPPER_BOUND}"); UnitemporalDelta ingestMode = UnitemporalDelta.builder() @@ -118,7 +116,7 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATIO return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATION_FILTERS__INCLUDES_NULL_VALUES() + public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_OPTIMIZATION_FILTERS__INCLUDES_NULL_VALUES() { OptimizationFilter filter = OptimizationFilter.of("id", "{ID_LOWER_BOUND}", "{ID_UPPER_BOUND}").withIncludesNullValues(true); UnitemporalDelta ingestMode = UnitemporalDelta.builder() @@ -132,7 +130,7 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATIO return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_MISSING_OPTIMIZATION_FILTER() + public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_MISSING_OPTIMIZATION_FILTER() { OptimizationFilter filter = OptimizationFilter.of("unknown_column", "{ID_LOWER_BOUND}", "{ID_UPPER_BOUND}"); UnitemporalDelta ingestMode = UnitemporalDelta.builder() @@ -146,7 +144,7 @@ public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_MISSING_OPT return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATION_FILTER_UNSUPPORTED_DATATYPE() + public TestScenario BATCH_ID_BASED__NO_DEL_IND__WITH_OPTIMIZATION_FILTER_UNSUPPORTED_DATATYPE() { OptimizationFilter filter = OptimizationFilter.of("name", "{NAME_LOWER_BOUND}", "{NAME_UPPER_BOUND}"); UnitemporalDelta ingestMode = UnitemporalDelta.builder() @@ -173,7 +171,7 @@ public TestScenario BATCH_ID_BASED__NO_VERSIONING__WITH_STAGING_FILTER() return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithFilter, ingestMode); } - public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__DEDUP__WITH_STAGING_FILTER() + public TestScenario BATCH_ID_BASED__FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTER() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -181,12 +179,13 @@ public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__DEDUP__WIT .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .build()) - .versioningStrategy(MaxVersionStrategy.builder().performDeduplication(true).versioningField(version.name()).versioningComparator(VersioningComparator.GREATER_THAN).build()) + .versioningStrategy(MaxVersionStrategy.builder().performStageVersioning(true).versioningField(version.name()).mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)).build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdAndVersionBasedSchema, stagingTableWithFilterAndVersion, ingestMode); } - public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITH_STAGING_FILTER() + public TestScenario BATCH_ID_BASED__NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM__WITH_STAGING_FILTER() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -194,12 +193,12 @@ public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__ .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .build()) - .versioningStrategy(MaxVersionStrategy.builder().performDeduplication(false).versioningField(version.name()).versioningComparator(VersioningComparator.GREATER_THAN).build()) + .versioningStrategy(MaxVersionStrategy.builder().performStageVersioning(false).versioningField(version.name()).mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)).build()) .build(); return new TestScenario(mainTableWithBatchIdAndVersionBasedSchema, stagingTableWithFilterAndVersion, ingestMode); } - public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITHOUT_STAGING_FILTER() + public TestScenario BATCH_ID_BASED__FAIL_ON_DUPS__MAX_VERSIONING_WITHOUT_PERFORM__NO_STAGING_FILTER() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -207,12 +206,13 @@ public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__ .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .build()) - .versioningStrategy(MaxVersionStrategy.builder().performDeduplication(false).versioningField(version.name()).versioningComparator(VersioningComparator.GREATER_THAN).build()) + .versioningStrategy(MaxVersionStrategy.builder().performStageVersioning(false).versioningField(version.name()).mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)).build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdAndVersionBasedSchema, stagingTableWithVersion, ingestMode); } - public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN_EQUAL__DEDUP__WITHOUT_STAGING_FILTER() + public TestScenario BATCH_ID_BASED__NO_DEDUP__MAX_VERSIONING__NO_STAGING_FILTER() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -220,7 +220,7 @@ public TestScenario BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN_EQUAL__DEDU .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .build()) - .versioningStrategy(MaxVersionStrategy.builder().performDeduplication(true).versioningField(version.name()).versioningComparator(VersioningComparator.GREATER_THAN_EQUAL_TO).build()) + .versioningStrategy(MaxVersionStrategy.builder().performStageVersioning(true).versioningField(version.name()).mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN_EQUAL_TO)).build()) .build(); return new TestScenario(mainTableWithBatchIdAndVersionBasedSchema, stagingTableWithVersion, ingestMode); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdDateTimeBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdDateTimeBasedScenarios.java index 5a1f593df3a..283aa64a426 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdDateTimeBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdDateTimeBasedScenarios.java @@ -16,11 +16,14 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import java.util.Arrays; -import java.util.Optional; public class UnitemporalDeltaBatchIdDateTimeBasedScenarios extends BaseTest { @@ -30,16 +33,11 @@ public class UnitemporalDeltaBatchIdDateTimeBasedScenarios extends BaseTest Variables: 1) transactionMilestoning = BatchIdAndDateTime 2) deleteIndicator : Enabled, Disabled - 3) DataSplit: Enabled, Disabled - - Valid Combinations: - 1) No Delete Ind, No Data Splits - 2) No Delete Ind, With Data Splits - 3) With Delete Ind, No Data Splits - 4) With Delete Ind, With Data Splits + 3) Deduplication: Allow duplicates, Filter duplicates, Fail on duplicates + 4) Versioning: No Versioning, Max Versioning, All Versioning */ - public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -53,23 +51,24 @@ public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS() return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__NO_DEL_IND__FILTER_DUPS__ALL_VERSION_WITHOUT_PERFORM() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) .transactionMilestoning(BatchIdAndDateTime.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .dateTimeInName(batchTimeInField) .dateTimeOutName(batchTimeOutField) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).dataSplitFieldName(dataSplitField).performStageVersioning(false).build()) .build(); return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND_MULTI_VALUES__NO_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND_MULTI_VALUES__NO_DEDUP_NO_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -88,7 +87,7 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND_MULTI_VALUES__NO_DATA_ return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithDeleteIndicator, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DEDUP__NO_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -107,11 +106,10 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS() return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBooleanDeleteIndicator, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__FAIL_ON_DUP__ALL_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) .transactionMilestoning(BatchIdAndDateTime.builder() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) @@ -122,9 +120,11 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() .deleteField(deleteIndicatorField) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).dataSplitFieldName(dataSplitField).performStageVersioning(true).build()) .build(); - return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithDeleteIndicatorWithDataSplit, ingestMode); + return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithDeleteIndicator, ingestMode); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaDateTimeBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaDateTimeBasedScenarios.java index af8e8fb7b33..35f2028680c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaDateTimeBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaDateTimeBasedScenarios.java @@ -16,11 +16,14 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import java.util.Arrays; -import java.util.Optional; public class UnitemporalDeltaDateTimeBasedScenarios extends BaseTest { @@ -30,16 +33,11 @@ public class UnitemporalDeltaDateTimeBasedScenarios extends BaseTest Variables: 1) transactionMilestoning = DateTime 2) deleteIndicator : Enabled, Disabled - 3) DataSplit: Enabled, Disabled - - Valid Combinations: - 1) No Delete Ind, No Data Splits - 2) No Delete Ind, With Data Splits - 3) With Delete Ind, No Data Splits - 4) With Delete Ind, With Data Splits + 3) Deduplication: Allow duplicates, Filter duplicates, Fail on duplicates + 4) Versioning: No Versioning, Max Versioning, All Versioning */ - public TestScenario DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS() + public TestScenario DATETIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSIONING() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -51,21 +49,22 @@ public TestScenario DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS() return new TestScenario(mainTableWithDateTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario DATETIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS() + public TestScenario DATETIME_BASED__NO_DEL_IND__FAIL_ON_DUPS__ALL_VERSION_WITHOUT_PERFORM() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) .transactionMilestoning(TransactionDateTime.builder() .dateTimeInName(batchTimeInField) .dateTimeOutName(batchTimeOutField) .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).dataSplitFieldName(dataSplitField).performStageVersioning(false).build()) .build(); return new TestScenario(mainTableWithDateTime, stagingTableWithBaseSchemaHavingDigestAndDataSplit, ingestMode); } - public TestScenario DATETIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS() + public TestScenario DATETIME_BASED__WITH_DEL_IND__NO_DEDUP__NO_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) @@ -81,11 +80,10 @@ public TestScenario DATETIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS() return new TestScenario(mainTableWithDateTime, stagingTableWithDeleteIndicator, ingestMode); } - public TestScenario DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() + public TestScenario DATETIME_BASED__WITH_DEL_IND__FILTER_DUPS__ALL_VERSION() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() .digestField(digestField) - .dataSplitField(Optional.of(dataSplitField)) .transactionMilestoning(TransactionDateTime.builder() .dateTimeInName(batchTimeInField) .dateTimeOutName(batchTimeOutField) @@ -94,8 +92,10 @@ public TestScenario DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() .deleteField(deleteIndicatorField) .addAllDeleteValues(Arrays.asList(deleteIndicatorValues)) .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).dataSplitFieldName(dataSplitField).performStageVersioning(true).build()) .build(); - return new TestScenario(mainTableWithDateTime, stagingTableWithDeleteIndicatorWithDataSplit, ingestMode); + return new TestScenario(mainTableWithDateTime, stagingTableWithDeleteIndicator, ingestMode); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java index 5162e437250..7f0991b20d4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; @@ -29,18 +30,18 @@ public class UnitemporalSnapshotBatchIdBasedScenarios extends BaseTest Variables: 1) transactionMilestoning = BatchId 2) partition : Enabled, Disabled - 3) DataSplit: Enabled, Disabled - 4) partitionValuesByField: Enabled, Disabled + 3) partitionValuesByField: Enabled, Disabled + 4) Versioning: NoVersioning, MaxVersioning + 5) Deduplication: AllowDups, FailOnDups, FilterDups Valid Combinations: - 1) Without Partition, No Data Splits - 2) Without Partition, With Data Splits -> TBD - 3) With Partition, No Data Splits - 4) With Partition, With Data Splits -> TBD - 5) Without Partition, No Data Splits, Partition Filter + 1) Without Partition, No Dedup No Versioning + 2) Without Partition, FailOnDups No Versioning + 3) With Partition, No Dedup No Versioning + 4) With Partition Filter, No Dedup No Versioning */ - public TestScenario BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) @@ -53,12 +54,21 @@ public TestScenario BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS() return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__WITHOUT_PARTITIONS__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__WITHOUT_PARTITIONS__FAIL_ON_DUPS__NO_VERSION() { - return null; + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .emptyDatasetHandling(NoOp.builder().build()) + .build(); + return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__WITH_PARTITIONS__NO_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) @@ -71,13 +81,7 @@ public TestScenario BATCH_ID_BASED__WITH_PARTITIONS__NO_DATA_SPLITS() return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_BASED__WITH_PARTITIONS__WITH_DATA_SPLITS() - { - return null; - } - - - public TestScenario BATCH_ID_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS() + public TestScenario BATCH_ID_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java index 57419aa4c90..dd7359eb530 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java @@ -16,8 +16,12 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.DeleteTargetData; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import java.util.Arrays; @@ -29,18 +33,19 @@ public class UnitemporalSnapshotBatchIdDateTimeBasedScenarios extends BaseTest Variables: 1) transactionMilestoning = BatchIdAndDateTimeBased 2) partition : Enabled, Disabled - 3) DataSplit: Enabled, Disabled - 4) partitionValuesByField: Enabled, Disabled + 3) partitionValuesByField: Enabled, Disabled + 4) Versioning: NoVersioning, MaxVersioning + 5) Deduplication: AllowDups, FailOnDups, FilterDups Valid Combinations: - 1) Without Partition, No Data Splits - 2) Without Partition, With Data Splits -> TBD - 3) With Partition, No Data Splits - 4) With Partition, With Data Splits -> TBD - 5) Without Partition, No Data Splits, Partition Filter + 1) Without Partition, No Dedup No Versioning + 2) Without Partition, No Dedup MaxVersioning + 2) Without Partition, Filter Dups MaxVersioning + 3) With Partition, No Dedup No Versioning + 5) With Partition Filter, No Dedup No Versioning */ - public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) @@ -55,12 +60,24 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS( return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__MAX_VERSION() { - return null; + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .emptyDatasetHandling(DeleteTargetData.builder().build()) + .build(); + return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__FILTER_DUPS__MAX_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) @@ -70,18 +87,29 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS() .dateTimeInName(batchTimeInField) .dateTimeOutName(batchTimeOutField) .build()) - .addAllPartitionFields(Arrays.asList(partitionKeys)) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .emptyDatasetHandling(DeleteTargetData.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__WITH_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION() { - return null; + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .addAllPartitionFields(Arrays.asList(partitionKeys)) + .build(); + return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - - public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS() + public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotDateTimeBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotDateTimeBasedScenarios.java index 08d653bb416..87b6b09ed74 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotDateTimeBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotDateTimeBasedScenarios.java @@ -16,7 +16,10 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import java.util.Arrays; @@ -28,18 +31,18 @@ public class UnitemporalSnapshotDateTimeBasedScenarios extends BaseTest Variables: 1) transactionMilestoning = Datetime based 2) partition : Enabled, Disabled - 3) DataSplit: Enabled, Disabled - 4) partitionValuesByField: Enabled, Disabled + 3) partitionValuesByField: Enabled, Disabled + 4) Versioning: NoVersioning, MaxVersioning + 5) Deduplication: AllowDups, FailOnDups, FilterDups Valid Combinations: - 1) Without Partition, No Data Splits - 2) Without Partition, With Data Splits -> TBD - 3) With Partition, No Data Splits - 4) With Partition, With Data Splits -> TBD - 5) Without Partition, No Data Splits, Partition Filter + 1) Without Partition, No Dedup No Versioning + 2) Without Partition, FailOnDups MaxVersioning + 3) With Partition, No Dedup No Versioning + 4) With Partition Filter, No Dedup No Versioning */ - public TestScenario DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS() + public TestScenario DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) @@ -51,12 +54,21 @@ public TestScenario DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS() return new TestScenario(mainTableWithDateTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario DATETIME_BASED__WITHOUT_PARTITIONS__WITH_DATA_SPLITS() + public TestScenario DATETIME_BASED__WITHOUT_PARTITIONS__FAIL_ON_DUP__MAX_VERSION() { - return null; + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(TransactionDateTime.builder() + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .versioningStrategy(MaxVersionStrategy.builder().versioningField("biz_date").mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .build(); + return new TestScenario(mainTableWithDateTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario DATETIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS() + public TestScenario DATETIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) @@ -69,13 +81,7 @@ public TestScenario DATETIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS() return new TestScenario(mainTableWithDateTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } - public TestScenario DATETIME_BASED__WITH_PARTITIONS__WITH_DATA_SPLITS() - { - return null; - } - - - public TestScenario DATETIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS() + public TestScenario DATETIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION() { UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java index a76656c6870..a3cf0a5f3e3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java @@ -37,9 +37,9 @@ public abstract class AppendOnlyTestCases extends BaseTest AppendOnlyScenarios scenarios = new AppendOnlyScenarios(); @Test - void testAppendOnlyAllowDuplicatesNoAuditing() + void testAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecords() { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_NO_AUDITING(); + TestScenario scenario = scenarios.NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -49,13 +49,13 @@ void testAppendOnlyAllowDuplicatesNoAuditing() .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyAllowDuplicatesNoAuditing(operations); + verifyAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema(operations); } @Test - void testAppendOnlyAllowDuplicatesNoAuditingDeriveMainSchema() + void testAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema() { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_NO_AUDITING_DERIVE_MAIN_SCHEMA(); + TestScenario scenario = scenarios.NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS__DERIVE_MAIN_SCHEMA(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -65,31 +65,15 @@ void testAppendOnlyAllowDuplicatesNoAuditingDeriveMainSchema() .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyAllowDuplicatesNoAuditing(operations); + verifyAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema(operations); } - public abstract void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations); + public abstract void verifyAppendOnlyNoAuditingNoDedupNoVersioningNoFilterExistingRecordsDeriveMainSchema(GeneratorResult operations); @Test - void testAppendOnlyAllowDuplicatesWithAuditing() + void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords() { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_WITH_AUDITING(); - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(scenario.getIngestMode()) - .relationalSink(getRelationalSink()) - .collectStatistics(true) - .executionTimestampClock(fixedClock_2000_01_01) - .build(); - GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyAllowDuplicatesWithAuditing(operations); - } - - public abstract void verifyAppendOnlyAllowDuplicatesWithAuditing(GeneratorResult operations); - - @Test - void testAppendOnlyAllowDuplicatesWithAuditingWithDataSplits() - { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_WITH_AUDITING__WITH_DATASPLIT(); + TestScenario scenario = scenarios.WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -97,64 +81,15 @@ void testAppendOnlyAllowDuplicatesWithAuditingWithDataSplits() .executionTimestampClock(fixedClock_2000_01_01) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(operations, dataSplitRangesOneToTwo); - } - - public abstract void verifyAppendOnlyAllowDuplicatesWithAuditingWithDataSplits(List generatorResults, List dataSplitRanges); - - @Test - void testAppendOnlyFailOnDuplicatesNoAuditing() - { - TestScenario scenario = scenarios.FAIL_ON_DUPLICATES_NO_AUDITING(); - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(scenario.getIngestMode()) - .relationalSink(getRelationalSink()) - .collectStatistics(true) - .build(); - - GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyFailOnDuplicatesNoAuditing(operations); + verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(operations, dataSplitRangesOneToTwo); } - public abstract void verifyAppendOnlyFailOnDuplicatesNoAuditing(GeneratorResult operations); + public abstract void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecords(List generatorResults, List dataSplitRanges); @Test - void testAppendOnlyFailOnDuplicatesWithAuditing() + void testAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRecords() { - TestScenario scenario = scenarios.FAIL_ON_DUPLICATES_WITH_AUDITING(); - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(scenario.getIngestMode()) - .relationalSink(getRelationalSink()) - .executionTimestampClock(fixedClock_2000_01_01) - .collectStatistics(true) - .build(); - - GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyFailOnDuplicatesWithAuditing(operations); - } - - public abstract void verifyAppendOnlyFailOnDuplicatesWithAuditing(GeneratorResult operations); - - @Test - void testAppendOnlyFilterDuplicatesNoAuditing() - { - TestScenario scenario = scenarios.FILTER_DUPLICATES_NO_AUDITING(); - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(scenario.getIngestMode()) - .relationalSink(getRelationalSink()) - .collectStatistics(true) - .build(); - - GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyFilterDuplicatesNoAuditing(operations); - } - - public abstract void verifyAppendOnlyFilterDuplicatesNoAuditing(GeneratorResult operations); - - @Test - void testAppendOnlyFilterDuplicatesWithAuditing() - { - TestScenario scenario = scenarios.FILTER_DUPLICATES_WITH_AUDITING(); + TestScenario scenario = scenarios.WITH_AUDITING__FILTER_DUPS__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -164,15 +99,15 @@ void testAppendOnlyFilterDuplicatesWithAuditing() .build(); GeneratorResult queries = generator.generateOperations(scenario.getDatasets()); - verifyAppendOnlyFilterDuplicatesWithAuditing(queries); + verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRecords(queries); } - public abstract void verifyAppendOnlyFilterDuplicatesWithAuditing(GeneratorResult queries); + public abstract void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterExistingRecords(GeneratorResult queries); @Test - void testAppendOnlyFilterDuplicatesNoAuditingWithDataSplit() + void testAppendOnlyNoAuditingValidation() { - TestScenario scenario = scenarios.FILTER_DUPLICATES_NO_AUDITING_WITH_DATA_SPLIT(); + TestScenario scenario = scenarios.NO_AUDITING__FILTER_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -184,14 +119,14 @@ void testAppendOnlyFilterDuplicatesNoAuditingWithDataSplit() } catch (Exception e) { - Assertions.assertEquals("DataSplits not supported for NoAuditing mode", e.getMessage()); + Assertions.assertEquals("NoAuditing not allowed when there are primary keys", e.getMessage()); } } @Test - public void testAppendOnlyFilterDuplicatesWithAuditingWithDataSplit() + public void testAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExistingRecords() { - TestScenario scenario = scenarios.FILTER_DUPLICATES_WITH_AUDITING_WITH_DATA_SPLIT(); + TestScenario scenario = scenarios.WITH_AUDITING__FILTER_DUPS__ALL_VERSION__WITH_FILTER_EXISTING_RECORDS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -200,19 +135,20 @@ public void testAppendOnlyFilterDuplicatesWithAuditingWithDataSplit() .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyAppendOnlyFilterDuplicatesWithAuditingWithDataSplit(operations, dataSplitRangesOneToTwo); + verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExistingRecords(operations, dataSplitRangesOneToTwo); } - public abstract void verifyAppendOnlyFilterDuplicatesWithAuditingWithDataSplit(List operations, List dataSplitRanges); + public abstract void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExistingRecords(List operations, List dataSplitRanges); @Test void testAppendOnlyWithUpperCaseOptimizer() { - TestScenario scenario = scenarios.FILTER_DUPLICATES_NO_AUDITING(); + TestScenario scenario = scenarios.WITH_AUDITING__FILTER_DUPS__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) .caseConversion(CaseConversion.TO_UPPER) + .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); @@ -224,13 +160,14 @@ void testAppendOnlyWithUpperCaseOptimizer() @Test void testAppendOnlyWithLessColumnsInStaging() { - TestScenario scenario = scenarios.FILTER_DUPLICATES_NO_AUDITING(); + TestScenario scenario = scenarios.WITH_AUDITING__FILTER_DUPS__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS(); Dataset stagingTable = scenario.getStagingTable().withSchema(stagingTableSchemaWithLimitedColumns); Datasets datasets = Datasets.of(scenario.getMainTable(), stagingTable); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -240,28 +177,39 @@ void testAppendOnlyWithLessColumnsInStaging() public abstract void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations); @Test - void testAppendOnlyValidationPkFieldsMissing() + void testAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecords() { - TestScenario testScenario = scenarios.FILTER_DUPLICATES_NO_AUDITING(); - // Staging table has no pks - Dataset stagingTable = testScenario.getStagingTable().withSchema(baseTableSchemaWithNoPrimaryKeys); - Datasets datasets = Datasets.of(testScenario.getMainTable(), stagingTable); - try - { - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(testScenario.getIngestMode()) - .relationalSink(getRelationalSink()) - .executionTimestampClock(fixedClock_2000_01_01) - .build(); - GeneratorResult queries = generator.generateOperations(datasets); - Assertions.fail("Exception was not thrown"); - } - catch (Exception e) - { - Assertions.assertEquals("Primary key list must not be empty", e.getMessage()); - } + TestScenario scenario = scenarios.WITH_AUDITING__FAIL_ON_DUPS__MAX_VERSION__WITH_FILTER_EXISTING_RECORDS(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecords(operations); } + public abstract void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExistingRecords(GeneratorResult operations); + + @Test + void testAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords() + { + TestScenario scenario = scenarios.WITH_AUDITING__FILTER_DUPS__MAX_VERSION__NO_FILTER_EXISTING_RECORDS(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords(operations); + } + + public abstract void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingRecords(GeneratorResult operations); + @Test void testAppendOnlyValidationDateTimeFieldMissing() { @@ -280,5 +228,24 @@ void testAppendOnlyValidationDateTimeFieldMissing() } } + @Test + void testAppendOnlyNoAuditingFilterExistingRecords() + { + TestScenario scenario = scenarios.NO_AUDITING__NO_DEDUP__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .build(); + try + { + List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); + } + catch (Exception e) + { + Assertions.assertEquals("Primary keys and digest are mandatory for filterExistingRecords", e.getMessage()); + } + } + public abstract RelationalSink getRelationalSink(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java index 87a69bf5da6..87ad65d7f20 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java @@ -36,9 +36,9 @@ public abstract class NontemporalDeltaTestCases extends BaseTest NonTemporalDeltaScenarios scenarios = new NonTemporalDeltaScenarios(); @Test - void testNontemporalDeltaNoAuditingNoDataSplit() + void testNontemporalDeltaNoAuditingNoDedupNoVersioning() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -49,15 +49,15 @@ void testNontemporalDeltaNoAuditingNoDataSplit() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaNoAuditingNoDataSplit(operations); + verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(operations); } - public abstract void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operations); + public abstract void verifyNontemporalDeltaNoAuditingNoDedupNoVersioning(GeneratorResult operations); @Test - void testNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator() + void testNontemporalDeltaNoAuditingWithDeleteIndicatorNoDedupNoVersioning() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT__WITH_DELETE_INDICATOR(); + TestScenario testScenario = scenarios.NO_AUDTING__WITH_DELETE_INDICATOR__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -65,15 +65,15 @@ void testNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator(operations); + verifyNontemporalDeltaNoAuditingWithDeleteIndicatorNoDedupNoVersioning(operations); } - public abstract void verifyNontemporalDeltaNoAuditingNoDataSplitWithDeleteIndicator(GeneratorResult operations); + public abstract void verifyNontemporalDeltaNoAuditingWithDeleteIndicatorNoDedupNoVersioning(GeneratorResult operations); @Test - void testNontemporalDeltaWithAuditingNoDataSplit() + void testNontemporalDeltaWithAuditingFilterDupsNoVersioning() { - TestScenario testScenario = scenarios.WITH_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.WITH_AUDTING__FILTER_DUPLICATES__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -82,15 +82,15 @@ void testNontemporalDeltaWithAuditingNoDataSplit() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaWithAuditingNoDataSplit(operations); + verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(operations); } - public abstract void verifyNontemporalDeltaWithAuditingNoDataSplit(GeneratorResult operations); + public abstract void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorResult operations); @Test - void testNonTemporalDeltaNoAuditingWithDataSplit() + void testNonTemporalDeltaNoAuditingNoDedupAllVersion() { - TestScenario testScenario = scenarios.NO_AUDTING__WITH_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__ALL_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -98,15 +98,31 @@ void testNonTemporalDeltaNoAuditingWithDataSplit() .build(); List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); - verifyNonTemporalDeltaNoAuditingWithDataSplit(operations, dataSplitRangesOneToTwo); + verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(operations, dataSplitRangesOneToTwo); } - public abstract void verifyNonTemporalDeltaNoAuditingWithDataSplit(List operations, List dataSplitRanges); + public abstract void verifyNonTemporalDeltaNoAuditingNoDedupAllVersion(List operations, List dataSplitRanges); @Test - void testNonTemporalDeltaWithWithAuditingWithDataSplit() + void testNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform() { - TestScenario testScenario = scenarios.WITH_AUDTING__WITH_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__ALL_VERSION_WITHOUT_PERFORM(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(testScenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .build(); + + List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); + verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); + } + + public abstract void verifyNonTemporalDeltaNoAuditingNoDedupAllVersionWithoutPerform(List operations, List dataSplitRanges); + + @Test + void testNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion() + { + TestScenario testScenario = scenarios.WITH_AUDTING__FAIL_ON_DUPS__ALL_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -115,15 +131,15 @@ void testNonTemporalDeltaWithWithAuditingWithDataSplit() .build(); List operations = generator.generateOperationsWithDataSplits(testScenario.getDatasets(), dataSplitRangesOneToTwo); - verifyNonTemporalDeltaWithWithAuditingWithDataSplit(operations, dataSplitRangesOneToTwo); + verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(operations, dataSplitRangesOneToTwo); } - public abstract void verifyNonTemporalDeltaWithWithAuditingWithDataSplit(List operations, List dataSplitRanges); + public abstract void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List operations, List dataSplitRanges); @Test void testNontemporalDeltaWithUpperCaseOptimizer() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -139,7 +155,7 @@ void testNontemporalDeltaWithUpperCaseOptimizer() @Test void testNontemporalDeltaWithLessColumnsInStaging() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); Dataset stagingTable = testScenario.getStagingTable().withSchema(stagingTableSchemaWithLimitedColumns); Datasets datasets = Datasets.of(testScenario.getMainTable(), stagingTable); @@ -157,7 +173,7 @@ void testNontemporalDeltaWithLessColumnsInStaging() @Test void testNontemporalDeltaValidationPkFieldsMissing() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); // Staging table has no pks Dataset stagingTable = testScenario.getStagingTable().withSchema(baseTableSchemaWithNoPrimaryKeys); Datasets datasets = Datasets.of(testScenario.getMainTable(), stagingTable); @@ -198,7 +214,7 @@ void testNontemporalDeltaValidationDateTimeFieldMissing() @Test public void testNontemporalDeltaPostActionSqlAndCleanStagingData() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -228,9 +244,9 @@ void testNontemporalDeltaWithNoVersionAndStagingFilter() public abstract void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult operations); @Test - void testNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup() + void testNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters() { - TestScenario testScenario = scenarios.MAX_VERSIONING_WITH_GREATER_THAN__DEDUP__WITH_STAGING_FILTER(); + TestScenario testScenario = scenarios.FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -238,15 +254,15 @@ void testNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(operations); + verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(operations); } - public abstract void verifyNontemporalDeltaWithMaxVersioningAndStagingFiltersWithDedup(GeneratorResult operations); + public abstract void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations); @Test - void testNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters() + void testNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters() { - TestScenario testScenario = scenarios.MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITH_STAGING_FILTER(); + TestScenario testScenario = scenarios.NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM__WITH_STAGING_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -254,15 +270,15 @@ void testNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(operations); + verifyNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters(operations); } - public abstract void verifyNontemporalDeltaWithMaxVersioningNoDedupAndStagingFilters(GeneratorResult operations); + public abstract void verifyNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilters(GeneratorResult operations); @Test - void testNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters() + void testNontemporalDeltaNoDedupMaxVersionWithoutPerform() { - TestScenario testScenario = scenarios.MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITHOUT_STAGING_FILTER(); + TestScenario testScenario = scenarios.NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -270,15 +286,15 @@ void testNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(operations); + verifyNontemporalDeltaNoDedupMaxVersionWithoutPerform(operations); } - public abstract void verifyNontemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations); + public abstract void verifyNontemporalDeltaNoDedupMaxVersionWithoutPerform(GeneratorResult operations); @Test - void testNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters() + void testNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase() { - TestScenario testScenario = scenarios.MAX_VERSIONING_WITH_GREATER_THAN_EQUAL__DEDUP__WITHOUT_STAGING_FILTER(); + TestScenario testScenario = scenarios.NO_DEDUP__MAX_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -287,10 +303,10 @@ void testNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutSta .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(operations); + verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(operations); } - public abstract void verifyNontemporalDeltaWithWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations); + public abstract void verifyNontemporalDeltaAllowDuplicatesMaxVersionWithUpperCase(GeneratorResult operations); public abstract RelationalSink getRelationalSink(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java index 979a71ccd76..695253d295e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java @@ -19,6 +19,8 @@ import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -41,9 +43,9 @@ public abstract class NontemporalSnapshotTestCases extends BaseTest NontemporalSnapshotTestScenarios scenarios = new NontemporalSnapshotTestScenarios(); @Test - void testNontemporalSnapshotNoAuditingNoDataSplit() + void testNontemporalSnapshotNoAuditingNoDedupNoVersioning() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -54,31 +56,15 @@ void testNontemporalSnapshotNoAuditingNoDataSplit() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalSnapshotNoAuditingNoDataSplit(operations); + verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(operations); } - public abstract void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult operations); + public abstract void verifyNontemporalSnapshotNoAuditingNoDedupNoVersioning(GeneratorResult operations); @Test - void testNontemporalSnapshotNoAuditingWithDataSplit() + void testNontemporalSnapshotWithAuditingFilterDupsNoVersioning() { - TestScenario testScenario = scenarios.NO_AUDTING__WITH_DATASPLIT(); - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(testScenario.getIngestMode()) - .relationalSink(getRelationalSink()) - .collectStatistics(true) - .build(); - - GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalSnapshotNoAuditingWithDataSplit(operations); - } - - public abstract void verifyNontemporalSnapshotNoAuditingWithDataSplit(GeneratorResult operations); - - @Test - void testNontemporalSnapshotWithAuditingNoDataSplit() - { - TestScenario testScenario = scenarios.WITH_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.WITH_AUDTING__FILTER_DUPLICATES__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -87,15 +73,15 @@ void testNontemporalSnapshotWithAuditingNoDataSplit() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalSnapshotWithAuditingNoDataSplit(operations); + verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(operations); } - public abstract void verifyNontemporalSnapshotWithAuditingNoDataSplit(GeneratorResult operations); + public abstract void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(GeneratorResult operations); @Test - void testNontemporalSnapshotWithAuditingWithDataSplit() + void testNontemporalSnapshotWithAuditingFailOnDupMaxVersion() { - TestScenario testScenario = scenarios.WITH_AUDTING__WITH_DATASPLIT(); + TestScenario testScenario = scenarios.WITH_AUDTING__FAIL_ON_DUP__MAX_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -104,15 +90,15 @@ void testNontemporalSnapshotWithAuditingWithDataSplit() .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); - verifyNontemporalSnapshotWithAuditingWithDataSplit(operations); + verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(operations); } - public abstract void verifyNontemporalSnapshotWithAuditingWithDataSplit(GeneratorResult operations); + public abstract void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorResult operations); @Test void testNontemporalSnapshotWithUpperCaseOptimizer() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -128,7 +114,7 @@ void testNontemporalSnapshotWithUpperCaseOptimizer() @Test void testNontemporalSnapshotWithLessColumnsInStaging() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); Dataset stagingTable = testScenario.getStagingTable().withSchema(baseTableShortenedSchema); Datasets datasets = Datasets.of(testScenario.getMainTable(), stagingTable); @@ -165,6 +151,23 @@ void testNontemporalSnapshotMandatoryDatasetMissing() } } + @Test + void testNontemporalSnapshotAllVersionValidation() + { + try + { + NontemporalSnapshot.builder() + .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("xyz").build()) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertEquals("Cannot build NontemporalSnapshot, AllVersionsStrategy not supported", e.getMessage()); + } + } + @Test void testNontemporalSnapshotDateTimeAuditingValidation() { @@ -184,7 +187,7 @@ void testNontemporalSnapshotDateTimeAuditingValidation() @Test public void testNontemporalSnapshotWithCleanStagingData() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -200,19 +203,19 @@ public void testNontemporalSnapshotWithCleanStagingData() @Test public void testNontemporalSnapshotWithDropStagingData() { - TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario testScenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Resources resources = Resources.builder().externalDatasetImported(true).build(); Planner planner = Planners.get(testScenario.getDatasets(), testScenario.getIngestMode(), options, getRelationalSink().capabilities()); RelationalTransformer transformer = new RelationalTransformer(getRelationalSink()); // post actions - LogicalPlan postActionsLogicalPlan = planner.buildLogicalPlanForPostActions(resources); - SqlPlan physicalPlanForPostActions = transformer.generatePhysicalPlan(postActionsLogicalPlan); - verifyNontemporalSnapshotWithDropStagingData(physicalPlanForPostActions); + LogicalPlan postCleanupLogicalPlan = planner.buildLogicalPlanForPostCleanup(resources); + SqlPlan physicalPlanForPostCleanup = transformer.generatePhysicalPlan(postCleanupLogicalPlan); + verifyNontemporalSnapshotWithDropStagingData(physicalPlanForPostCleanup); } - public abstract void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostActions); + public abstract void verifyNontemporalSnapshotWithDropStagingData(SqlPlan physicalPlanForPostCleanup); public abstract RelationalSink getRelationalSink(); } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java index 6725f7e0a5d..29aa39969cd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java @@ -20,7 +20,6 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; import org.finos.legend.engine.persistence.components.scenarios.AppendOnlyScenarios; import org.finos.legend.engine.persistence.components.scenarios.TestScenario; import org.junit.jupiter.api.Assertions; @@ -33,9 +32,9 @@ public class AppendOnlyBasedDerivationTest AppendOnlyScenarios scenarios = new AppendOnlyScenarios(); @Test - void testAppendOnlyAllowDuplicatesNoAuditing() + void testAppendOnlyAllowDuplicatesNoAuditingNoVersioningNoFilterExistingRecords() { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_NO_AUDITING(); + TestScenario scenario = scenarios.NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS(); assertDerivedMainDataset(scenario); AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertEquals("DIGEST", mode.digestField().get()); @@ -44,22 +43,10 @@ void testAppendOnlyAllowDuplicatesNoAuditing() } @Test - void testAppendOnlyAllowDuplicatesWithAuditing() + void testAppendOnlyFailOnDuplicatesWithAuditingAllVersionNoFilterExistingRecords() { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_WITH_AUDITING(); - assertDerivedMainDataset(scenario); - AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); - Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); - DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); - Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof AllowDuplicates); - } - - @Test - void testAppendOnlyAllowDuplicatesWithAuditingWithDataSplit() - { - TestScenario scenario = scenarios.ALLOW_DUPLICATES_WITH_AUDITING__WITH_DATASPLIT(); + // Auditing column is a PK + TestScenario scenario = scenarios.WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS(); assertDerivedMainDataset(scenario); AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertEquals("DIGEST", mode.digestField().get()); @@ -67,79 +54,20 @@ void testAppendOnlyAllowDuplicatesWithAuditingWithDataSplit() Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof AllowDuplicates); - } - - @Test - void testAppendOnlyFailOnDuplicatesNoAuditing() - { - TestScenario scenario = scenarios.FAIL_ON_DUPLICATES_NO_AUDITING(); - assertDerivedMainDataset(scenario); - AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); - Assertions.assertTrue(mode.auditing() instanceof NoAuditing); Assertions.assertTrue(mode.deduplicationStrategy() instanceof FailOnDuplicates); } @Test - void testAppendOnlyFailOnDuplicatesWithAuditing() + void testAppendOnlyAllowDuplicatesWithAuditingNoVersioningNoFilterExistingRecords() { - TestScenario scenario = scenarios.FAIL_ON_DUPLICATES_WITH_AUDITING(); + // Auditing column is not a PK + TestScenario scenario = scenarios.WITH_AUDITING__ALLOW_DUPLICATES__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS(); assertDerivedMainDataset(scenario); AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertEquals("DIGEST", mode.digestField().get()); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof FailOnDuplicates); - } - - @Test - void testAppendOnlyFilterDuplicatesNoAuditing() - { - TestScenario scenario = scenarios.FILTER_DUPLICATES_NO_AUDITING(); - assertDerivedMainDataset(scenario); - AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); - Assertions.assertTrue(mode.auditing() instanceof NoAuditing); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof FilterDuplicates); - } - - @Test - void testAppendOnlyFilterDuplicatesNoAuditingWithDataSplit() - { - TestScenario scenario = scenarios.FILTER_DUPLICATES_NO_AUDITING_WITH_DATA_SPLIT(); - assertDerivedMainDataset(scenario); - AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); - Assertions.assertEquals("DATA_SPLIT", mode.dataSplitField().get()); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof FilterDuplicates); - } - - @Test - void testAppendOnlyFilterDuplicatesWithAuditing() - { - TestScenario scenario = scenarios.FILTER_DUPLICATES_WITH_AUDITING(); - assertDerivedMainDataset(scenario); - AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); - Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); - DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); - Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof FilterDuplicates); - } - - @Test - void testAppendOnlyFilterDuplicatesWithAuditingWithDataSplit() - { - TestScenario scenario = scenarios.FILTER_DUPLICATES_WITH_AUDITING_WITH_DATA_SPLIT(); - assertDerivedMainDataset(scenario); - AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); - Assertions.assertEquals("DATA_SPLIT", mode.dataSplitField().get()); - Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); - DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); - Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); - Assertions.assertTrue(mode.deduplicationStrategy() instanceof FilterDuplicates); + Assertions.assertTrue(mode.deduplicationStrategy() instanceof AllowDuplicates); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalDeltaBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalDeltaBasedDerivationTest.java index 82880a126bc..8ad56b8c1c8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalDeltaBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalDeltaBasedDerivationTest.java @@ -34,7 +34,7 @@ public class NontemporalDeltaBasedDerivationTest @Test void testNontemporalDeltaNoAuditingNoDataSplit() { - TestScenario scenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario scenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); assertDerivedMainDataset(scenario); NontemporalDelta mode = (NontemporalDelta) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertEquals("DIGEST", mode.digestField()); @@ -44,7 +44,7 @@ void testNontemporalDeltaNoAuditingNoDataSplit() @Test void testNontemporalDeltaNoAuditingNoDataSplitWithDeleteInd() { - TestScenario scenario = scenarios.NO_AUDTING__NO_DATASPLIT__WITH_DELETE_INDICATOR(); + TestScenario scenario = scenarios.NO_AUDTING__WITH_DELETE_INDICATOR__NO_DEDUP__NO_VERSIONING(); assertDerivedMainDataset(scenario); NontemporalDelta mode = (NontemporalDelta) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertEquals("DIGEST", mode.digestField()); @@ -57,7 +57,7 @@ void testNontemporalDeltaNoAuditingNoDataSplitWithDeleteInd() @Test void testNontemporalDeltaNoAuditingWithDataSplit() { - TestScenario scenario = scenarios.NO_AUDTING__WITH_DATASPLIT(); + TestScenario scenario = scenarios.NO_AUDTING__NO_DEDUP__ALL_VERSION(); assertDerivedMainDataset(scenario); NontemporalDelta mode = (NontemporalDelta) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertTrue(mode.auditing() instanceof NoAuditing); @@ -68,7 +68,7 @@ void testNontemporalDeltaNoAuditingWithDataSplit() @Test void testNontemporalDeltaWithAuditingNoDataSplit() { - TestScenario scenario = scenarios.WITH_AUDTING__NO_DATASPLIT(); + TestScenario scenario = scenarios.WITH_AUDTING__FILTER_DUPLICATES__NO_VERSIONING(); assertDerivedMainDataset(scenario); NontemporalDelta mode = (NontemporalDelta) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); @@ -80,7 +80,7 @@ void testNontemporalDeltaWithAuditingNoDataSplit() @Test void testNontemporalSnapshotWithAuditingWithDataSplit() { - TestScenario scenario = scenarios.WITH_AUDTING__WITH_DATASPLIT(); + TestScenario scenario = scenarios.WITH_AUDTING__FAIL_ON_DUPS__ALL_VERSION(); assertDerivedMainDataset(scenario); NontemporalDelta mode = (NontemporalDelta) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalSnapshotBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalSnapshotBasedDerivationTest.java index c00bcb73d69..e2b8b8b9061 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalSnapshotBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/NontemporalSnapshotBasedDerivationTest.java @@ -31,28 +31,18 @@ public class NontemporalSnapshotBasedDerivationTest NontemporalSnapshotTestScenarios scenarios = new NontemporalSnapshotTestScenarios(); @Test - void testNontemporalSnapshotNoAuditingNoDataSplit() + void testNontemporalSnapshotNoAuditingNoDedupNoVersioning() { - TestScenario scenario = scenarios.NO_AUDTING__NO_DATASPLIT(); + TestScenario scenario = scenarios.NO_AUDTING__NO_DEDUP__NO_VERSIONING(); assertDerivedMainDataset(scenario); NontemporalSnapshot mode = (NontemporalSnapshot) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertTrue(mode.auditing() instanceof NoAuditing); } @Test - void testNontemporalSnapshotNoAuditingWithDataSplit() + void testNontemporalSnapshotWithAuditingFilterDupsNoVersioning() { - TestScenario scenario = scenarios.NO_AUDTING__WITH_DATASPLIT(); - assertDerivedMainDataset(scenario); - NontemporalSnapshot mode = (NontemporalSnapshot) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertTrue(mode.auditing() instanceof NoAuditing); - Assertions.assertEquals("DATA_SPLIT", mode.dataSplitField().get()); - } - - @Test - void testNontemporalSnapshotWithAuditingNoDataSplit() - { - TestScenario scenario = scenarios.WITH_AUDTING__NO_DATASPLIT(); + TestScenario scenario = scenarios.WITH_AUDTING__FILTER_DUPLICATES__NO_VERSIONING(); assertDerivedMainDataset(scenario); NontemporalSnapshot mode = (NontemporalSnapshot) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); @@ -61,15 +51,14 @@ void testNontemporalSnapshotWithAuditingNoDataSplit() } @Test - void testNontemporalSnapshotWithAuditingWithDataSplit() + void testNontemporalSnapshotWithAuditingFailOnDupMaxVersion() { - TestScenario scenario = scenarios.WITH_AUDTING__WITH_DATASPLIT(); + TestScenario scenario = scenarios.WITH_AUDTING__FAIL_ON_DUP__MAX_VERSION(); assertDerivedMainDataset(scenario); NontemporalSnapshot mode = (NontemporalSnapshot) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); - Assertions.assertEquals("DATA_SPLIT", mode.dataSplitField().get()); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java index fd0a9b3593c..63a34113aa7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java @@ -39,9 +39,9 @@ public abstract class UnitmemporalDeltaBatchIdBasedTestCases extends BaseTest UnitemporalDeltaBatchIdBasedScenarios scenarios = new UnitemporalDeltaBatchIdBasedScenarios(); @Test - void testUnitemporalDeltaNoDeleteIndNoDataSplits() + void testUnitemporalDeltaNoDeleteIndNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -51,15 +51,15 @@ void testUnitemporalDeltaNoDeleteIndNoDataSplits() .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaNoDeleteIndNoAuditing(operations); + verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations); @Test - void testUnitemporalDeltaNoDeleteIndWithDataSplits() + void testUnitemporalDeltaNoDeleteIndNoDedupAllVersionsWithoutPerform() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__ALL_VERSION_WITHOUT_PERFORM(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -67,15 +67,15 @@ void testUnitemporalDeltaNoDeleteIndWithDataSplits() .collectStatistics(true) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyUnitemporalDeltaNoDeleteIndWithDataSplits(operations, dataSplitRangesOneToTwo); + verifyUnitemporalDeltaNoDeleteIndNoDedupAllVersionsWithoutPerform(operations, dataSplitRangesOneToTwo); } - public abstract void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges); + public abstract void verifyUnitemporalDeltaNoDeleteIndNoDedupAllVersionsWithoutPerform(List operations, List dataSplitRanges); @Test - void testUnitemporalDeltaWithDeleteIndNoDataSplits() + void testUnitemporalDeltaWithDeleteIndFilterDupsNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__FILTER_DUPS__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -83,15 +83,15 @@ void testUnitemporalDeltaWithDeleteIndNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaWithDeleteIndNoDataSplits(operations); + verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(operations); } - public abstract void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithDeleteIndFilterDupsNoVersion(GeneratorResult operations); @Test - void testUnitemporalDeltaWithDeleteIndWithDataSplits() + void testUnitemporalDeltaWithDeleteIndNoDedupAllVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__NO_DEDUP__ALL_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -99,15 +99,15 @@ void testUnitemporalDeltaWithDeleteIndWithDataSplits() .collectStatistics(true) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyUnitemporalDeltaWithDeleteIndWithDataSplits(operations, dataSplitRangesOneToTwo); + verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(operations, dataSplitRangesOneToTwo); } - public abstract void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges); + public abstract void verifyUnitemporalDeltaWithDeleteIndNoDedupAllVersion(List operations, List dataSplitRanges); @Test void testUnitemporalDeltaWithUpperCaseOptimizer() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -125,7 +125,7 @@ void testUnitemporalDeltaWithUpperCaseOptimizer() @Test void testUnitemporalDeltaWithCleanStagingData() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -139,9 +139,9 @@ void testUnitemporalDeltaWithCleanStagingData() public abstract void verifyUnitemporalDeltaWithCleanStagingData(GeneratorResult operations); @Test - void testUnitemporalDeltaNoDeleteIndNoDataSplitsWithOptimizationFilters() + void testUnitemporalDeltaNoDeleteIndWithOptimizationFilters() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATION_FILTERS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__WITH_OPTIMIZATION_FILTERS(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -149,15 +149,15 @@ void testUnitemporalDeltaNoDeleteIndNoDataSplitsWithOptimizationFilters() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFilters(operations); + verifyUnitemporalDeltaNoDeleteIndWithOptimizationFilters(operations); } - public abstract void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFilters(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaNoDeleteIndWithOptimizationFilters(GeneratorResult operations); @Test - void testUnitemporalDeltaNoDeleteIndNoDataSplitsWithOptimizationFiltersIncludesNullValues() + void testUnitemporalDeltaNoDeleteIndWithOptimizationFiltersIncludesNullValues() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATION_FILTERS__INCLUDES_NULL_VALUES(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__WITH_OPTIMIZATION_FILTERS__INCLUDES_NULL_VALUES(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -165,10 +165,10 @@ void testUnitemporalDeltaNoDeleteIndNoDataSplitsWithOptimizationFiltersIncludesN .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFiltersIncludesNullValues(operations); + verifyUnitemporalDeltaNoDeleteIndWithOptimizationFiltersIncludesNullValues(operations); } - public abstract void verifyUnitemporalDeltaNoDeleteIndNoAuditingWithOptimizationFiltersIncludesNullValues(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaNoDeleteIndWithOptimizationFiltersIncludesNullValues(GeneratorResult operations); @Test void testUnitemporalDeltaValidationBatchIdOutMissing() @@ -197,7 +197,7 @@ void testUnitemporalDeltaValidationBatchIdOutMissing() @Test void testUnitemporalDeltaValidationBatchIdInNotPrimaryKey() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -222,7 +222,7 @@ void testUnitemporalDeltaValidationBatchIdInNotPrimaryKey() @Test void testUnitemporalDeltaValidationOptimizationColumnsNotPresent() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_MISSING_OPTIMIZATION_FILTER(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__WITH_MISSING_OPTIMIZATION_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -243,7 +243,7 @@ void testUnitemporalDeltaValidationOptimizationColumnsNotPresent() @Test void testUnitemporalDeltaValidationOptimizationColumnUnsupportedDataType() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS__WITH_OPTIMIZATION_FILTER_UNSUPPORTED_DATATYPE(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__WITH_OPTIMIZATION_FILTER_UNSUPPORTED_DATATYPE(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -278,9 +278,9 @@ void testUnitemporalDeltaWithNoVersioningAndStagingFilters() public abstract void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult operations); @Test - void testUnitemporalDeltaWithMaxVersioningDedupEnabledAndStagingFiltersWithDedup() + void testUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter() { - TestScenario scenario = scenarios.BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__DEDUP__WITH_STAGING_FILTER(); + TestScenario scenario = scenarios.BATCH_ID_BASED__FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) @@ -289,15 +289,15 @@ void testUnitemporalDeltaWithMaxVersioningDedupEnabledAndStagingFiltersWithDedup .cleanupStagingData(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - this.verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(operations); + this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(operations); } - public abstract void verifyUnitemporalDeltaWithMaxVersionDedupEnabledAndStagingFilter(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations); @Test - void testUnitemporalDeltaWithMaxVersioningNoDedupAndStagingFilters() + void testUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters() { - TestScenario scenario = scenarios.BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITH_STAGING_FILTER(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM__WITH_STAGING_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) @@ -307,15 +307,15 @@ void testUnitemporalDeltaWithMaxVersioningNoDedupAndStagingFilters() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - this.verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(operations); + this.verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(operations); } - public abstract void verifyUnitemporalDeltaWithMaxVersionNoDedupAndStagingFilter(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations); @Test - void testUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters() + void testUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform() { - TestScenario scenario = scenarios.BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN__NO_DEDUP__WITHOUT_STAGING_FILTER(); + TestScenario scenario = scenarios.BATCH_ID_BASED__FAIL_ON_DUPS__MAX_VERSIONING_WITHOUT_PERFORM__NO_STAGING_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) @@ -324,15 +324,15 @@ void testUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters() .cleanupStagingData(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - this.verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(operations); + this.verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(operations); } - public abstract void verifyUnitemporalDeltaWithMaxVersioningNoDedupWithoutStagingFilters(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithFailOnDupsMaxVersioningWithoutPerform(GeneratorResult operations); @Test - void testUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters() + void testUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters() { - TestScenario scenario = scenarios.BATCH_ID_BASED__MAX_VERSIONING_WITH_GREATER_THAN_EQUAL__DEDUP__WITHOUT_STAGING_FILTER(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEDUP__MAX_VERSIONING__NO_STAGING_FILTER(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) @@ -342,10 +342,10 @@ void testUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStaging .caseConversion(CaseConversion.TO_UPPER) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - this.verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(operations); + this.verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(operations); } - public abstract void verifyUnitemporalDeltaWithMaxVersioningDedupEnabledAndUpperCaseWithoutStagingFilters(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithNoDedupMaxVersioningAndUpperCaseWithoutStagingFilters(GeneratorResult operations); public abstract RelationalSink getRelationalSink(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java index 15a203fe743..47aa50fbc7b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdDateTimeBasedTestCases.java @@ -38,9 +38,9 @@ public abstract class UnitmemporalDeltaBatchIdDateTimeBasedTestCases extends Bas UnitemporalDeltaBatchIdDateTimeBasedScenarios scenarios = new UnitemporalDeltaBatchIdDateTimeBasedScenarios(); @Test - void testUnitemporalDeltaNoDeleteIndNoDataSplits() + void testUnitemporalDeltaNoDeleteIndNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -48,15 +48,15 @@ void testUnitemporalDeltaNoDeleteIndNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaNoDeleteIndNoAuditing(operations); + verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult operations); @Test - void testUnitemporalDeltaNoDeleteIndWithDataSplits() + void testUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__FILTER_DUPS__ALL_VERSION_WITHOUT_PERFORM(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -64,15 +64,15 @@ void testUnitemporalDeltaNoDeleteIndWithDataSplits() .collectStatistics(true) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyUnitemporalDeltaNoDeleteIndWithDataSplits(operations, dataSplitRangesOneToTwo); + verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); } - public abstract void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges); + public abstract void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform(List operations, List dataSplitRanges); @Test - void testUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits() + void testUnitemporalDeltaWithDeleteIndMultiValuesNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND_MULTI_VALUES__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND_MULTI_VALUES__NO_DEDUP_NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -80,15 +80,15 @@ void testUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(operations); + verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDedupNoVersion(GeneratorResult operations); @Test - void testUnitemporalDeltaWithDeleteIndNoDataSplits() + void testUnitemporalDeltaWithDeleteInd() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -96,15 +96,15 @@ void testUnitemporalDeltaWithDeleteIndNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaWithDeleteIndNoDataSplits(operations); + verifyUnitemporalDeltaWithDeleteInd(operations); } - public abstract void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithDeleteInd(GeneratorResult operations); @Test - void testUnitemporalDeltaWithDeleteIndWithDataSplits() + void testUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__FAIL_ON_DUP__ALL_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -112,15 +112,15 @@ void testUnitemporalDeltaWithDeleteIndWithDataSplits() .collectStatistics(true) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyUnitemporalDeltaWithDeleteIndWithDataSplits(operations, dataSplitRangesOneToTwo); + verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(operations, dataSplitRangesOneToTwo); } - public abstract void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges); + public abstract void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List operations, List dataSplitRanges); @Test void testUnitemporalDeltaWithUpperCaseOptimizer() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -138,7 +138,7 @@ void testUnitemporalDeltaWithUpperCaseOptimizer() @Test void testUnitemporalDeltaWithLessColumnsInStaging() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); Dataset stagingDataset = scenario.getStagingTable().withSchema(stagingTableSchemaWithLimitedColumns); Datasets datasets = Datasets.of(scenario.getMainTable(), stagingDataset); @@ -157,7 +157,7 @@ void testUnitemporalDeltaWithLessColumnsInStaging() @Test void testUnitemporalDeltaWithPlaceholders() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -176,7 +176,7 @@ void testUnitemporalDeltaWithPlaceholders() @Test void testUnitemporalDeltaWithOnlySchemaSet() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); Dataset mainTable = getMainDatasetWithOnlySchemaSet(scenario.getMainTable().schema()); Dataset stagingTable = getStagingDatasetWithOnlySchemaSet(scenario.getStagingTable().schema()); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -197,7 +197,7 @@ void testUnitemporalDeltaWithOnlySchemaSet() @Test void testUnitemporalDeltaWithDbAndSchemaBothSet() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); Dataset mainTable = getMainDatasetWithDbAndSchemaBothSet(scenario.getMainTable().schema()); Dataset stagingTable = getStagingDatasetWithDbAndSchemaBothSet(scenario.getStagingTable().schema()); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -218,7 +218,7 @@ void testUnitemporalDeltaWithDbAndSchemaBothSet() @Test void testUnitemporalDeltaWithDbAndSchemaBothNotSet() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); Dataset mainTable = getMainDatasetWithDbAndSchemaBothNotSet(scenario.getMainTable().schema()); Dataset stagingTable = getStagingDatasetWithDbAndSchemaBothNotSet(scenario.getStagingTable().schema()); Datasets datasets = Datasets.of(mainTable, stagingTable); @@ -239,7 +239,7 @@ void testUnitemporalDeltaWithDbAndSchemaBothNotSet() @Test void testUnitemporalDeltaWithCleanStagingData() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -357,7 +357,7 @@ void testUnitemporalDeltaValidationDeleteIndicatorValuesMissing() @Test void testUnitemporalDeltaValidationBatchIdInNotPrimaryKey() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java index 548fce115fa..cf3d7e63ca1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaDateTimeBasedTestCases.java @@ -36,9 +36,9 @@ public abstract class UnitmemporalDeltaDateTimeBasedTestCases extends BaseTest UnitemporalDeltaDateTimeBasedScenarios scenarios = new UnitemporalDeltaDateTimeBasedScenarios(); @Test - void testUnitemporalDeltaNoDeleteIndNoDataSplits() + void testUnitemporalDeltaNoDeleteIndNoDedupNoVersioning() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -46,15 +46,15 @@ void testUnitemporalDeltaNoDeleteIndNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaNoDeleteIndNoAuditing(operations); + verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(operations); } - public abstract void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult operations); @Test - void testUnitemporalDeltaNoDeleteIndWithDataSplits() + void testUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__FAIL_ON_DUPS__ALL_VERSION_WITHOUT_PERFORM(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -62,15 +62,15 @@ void testUnitemporalDeltaNoDeleteIndWithDataSplits() .collectStatistics(true) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyUnitemporalDeltaNoDeleteIndWithDataSplits(operations, dataSplitRangesOneToTwo); + verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(operations, dataSplitRangesOneToTwo); } - public abstract void verifyUnitemporalDeltaNoDeleteIndWithDataSplits(List operations, List dataSplitRanges); + public abstract void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform(List operations, List dataSplitRanges); @Test - void testUnitemporalDeltaWithDeleteIndNoDataSplits() + void testUnitemporalDeltaWithDeleteIndNoDedupNoVersioning() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -78,15 +78,15 @@ void testUnitemporalDeltaWithDeleteIndNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalDeltaWithDeleteIndNoDataSplits(operations); + verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(operations); } - public abstract void verifyUnitemporalDeltaWithDeleteIndNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(GeneratorResult operations); @Test - void testUnitemporalDeltaWithDeleteIndWithDataSplits() + void testUnitemporalDeltaWithDeleteIndFilterDupsAllVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__FILTER_DUPS__ALL_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -94,15 +94,15 @@ void testUnitemporalDeltaWithDeleteIndWithDataSplits() .collectStatistics(true) .build(); List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); - verifyUnitemporalDeltaWithDeleteIndWithDataSplits(operations, dataSplitRangesOneToTwo); + verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(operations, dataSplitRangesOneToTwo); } - public abstract void verifyUnitemporalDeltaWithDeleteIndWithDataSplits(List operations, List dataSplitRanges); + public abstract void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List operations, List dataSplitRanges); @Test void testUnitemporalDeltaWithUpperCaseOptimizer() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -119,7 +119,7 @@ void testUnitemporalDeltaWithUpperCaseOptimizer() @Test void testUnitemporalDeltaWithCleanStagingData() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -155,7 +155,7 @@ void testUnitemporalDeltaValidationBatchTimeInMissing() @Test void testUnitemporalDeltaValidationBatchTimeInNotPrimaryKey() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSIONING(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java index 1930004c250..29617a200a8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java @@ -16,8 +16,11 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; +import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -36,9 +39,9 @@ public abstract class UnitmemporalSnapshotBatchIdBasedTestCases extends BaseTest UnitemporalSnapshotBatchIdBasedScenarios scenarios = new UnitemporalSnapshotBatchIdBasedScenarios(); @Test - void testUnitemporalSnapshotWithoutPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -48,15 +51,34 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(operations); + verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations); + + @Test + void testUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion() + { + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__FAIL_ON_DUPS__NO_VERSION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .createStagingDataset(true) + .enableConcurrentSafety(true) + .build(); + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(operations); + } + + public abstract void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsNoVersion(GeneratorResult operations); + @Test void testUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -72,7 +94,7 @@ void testUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling() @Test void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -87,9 +109,9 @@ void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() public abstract void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionNoDataSplits() + void testUnitemporalSnapshotWithPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -97,15 +119,15 @@ void testUnitemporalSnapshotWithPartitionNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionNoDataSplits(operations); + verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() + void testUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -113,15 +135,15 @@ void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(operations); + verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations); @Test void testUnitemporalSnapshotWithCleanStagingData() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -160,7 +182,7 @@ void testUnitemporalSnasphotValidationBatchIdInMissing() @Test void testUnitemporalSnapshotValidationBatchIdInNotPrimaryKey() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -197,6 +219,27 @@ void testUnitemporalSnapshotValidationMainDatasetMissing() } } + @Test + void testUnitemporalSnapshotAllVersionValidation() + { + try + { + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .build()) + .versioningStrategy(AllVersionsStrategy.builder().versioningField("xyz").build()) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertEquals("Cannot build UnitemporalSnapshot, AllVersionsStrategy not supported", e.getMessage()); + } + } + public abstract RelationalSink getRelationalSink(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java index f012b3bec1a..3bde27e3b8c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java @@ -38,9 +38,9 @@ public abstract class UnitmemporalSnapshotBatchIdDateTimeBasedTestCases extends UnitemporalSnapshotBatchIdDateTimeBasedScenarios scenarios = new UnitemporalSnapshotBatchIdDateTimeBasedScenarios(); @Test - void testUnitemporalSnapshotWithoutPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -48,15 +48,31 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(operations); + verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(GeneratorResult operations); + + @Test + void testUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__MAX_VERSION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(operations); + } + + public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(GeneratorResult operations); @Test void testUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -70,9 +86,9 @@ void testUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandli public abstract void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() + void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__FILTER_DUPS__MAX_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -81,15 +97,15 @@ void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() .caseConversion(CaseConversion.TO_UPPER) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(operations); + verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilterDupsMaxVersion(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionNoDataSplits() + void testUnitemporalSnapshotWithPartitionNoDedupNoVersioning() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -97,15 +113,15 @@ void testUnitemporalSnapshotWithPartitionNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionNoDataSplits(operations); + verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(GeneratorResult operations); @Test void testUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -121,7 +137,7 @@ void testUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling() @Test void testUnitemporalSnapshotWithPartitionWithNoOpEmptyBatchHandling() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) .transactionMilestoning(BatchIdAndDateTime.builder() @@ -145,9 +161,9 @@ void testUnitemporalSnapshotWithPartitionWithNoOpEmptyBatchHandling() } @Test - void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() + void testUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -155,15 +171,15 @@ void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(operations); + verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(GeneratorResult operations); @Test void testUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -179,7 +195,7 @@ void testUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHan @Test void testUnitemporalSnapshotWithPartitionFiltersWithNoOpEmptyDataHandling() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) .transactionMilestoning(BatchIdAndDateTime.builder() @@ -206,7 +222,7 @@ void testUnitemporalSnapshotWithPartitionFiltersWithNoOpEmptyDataHandling() @Test void testUnitemporalSnapshotWithCleanStagingData() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -223,7 +239,7 @@ void testUnitemporalSnapshotWithCleanStagingData() @Test void testUnitemporalSnapshotWithLessColumnsInStaging() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); Dataset stagingDataset = scenario.getStagingTable().withSchema(stagingTableSchemaWithLimitedColumns); Datasets datasets = Datasets.of(scenario.getMainTable(), stagingDataset); @@ -242,7 +258,7 @@ void testUnitemporalSnapshotWithLessColumnsInStaging() @Test void testUnitemporalSnapshotWithPlaceholders() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -282,7 +298,7 @@ void testUnitemporalSnasphotValidationBatchIdInMissing() @Test void testUnitemporalSnapshotValidationBatchIdInNotPrimaryKey() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -331,7 +347,7 @@ void testUnitemporalSnapshotPartitionKeysValidation() @Test void testUnitemporalSnapshotFailOnEmptyBatch() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() .digestField(digestField) .transactionMilestoning(BatchIdAndDateTime.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java index bd79faf2246..13bc1fca81d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java @@ -36,9 +36,9 @@ public abstract class UnitmemporalSnapshotDateTimeBasedTestCases extends BaseTes UnitemporalSnapshotDateTimeBasedScenarios scenarios = new UnitemporalSnapshotDateTimeBasedScenarios(); @Test - void testUnitemporalSnapshotWithoutPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -46,15 +46,32 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(operations); + verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations); + + @Test + void testUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion() + { + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__FAIL_ON_DUP__MAX_VERSION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(operations); + } + + public abstract void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(GeneratorResult operations); + @Test void testUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling() { - TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -70,7 +87,7 @@ void testUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling() @Test void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() { - TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -85,9 +102,9 @@ void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() public abstract void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionNoDataSplits() + void testUnitemporalSnapshotWithPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -95,15 +112,15 @@ void testUnitemporalSnapshotWithPartitionNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionNoDataSplits(operations); + verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() + void testUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -111,15 +128,15 @@ void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(operations); + verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(GeneratorResult operations); @Test void testUnitemporalSnapshotWithCleanStagingData() { - TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) @@ -158,7 +175,7 @@ void testUnitemporalSnasphotValidationBatchTimeInMissing() @Test void testUnitemporalSnapshotValidationBatchTimeInNotPrimaryKey() { - TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdBasedDerivationTest.java index acf6fe217a0..20621a15953 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdBasedDerivationTest.java @@ -28,28 +28,28 @@ public class UnitemporalDeltaBatchIdBasedDerivationTest @Test void testUnitemporalDeltaNoDeleteIndNoDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaNoDeleteIndWithDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DEDUP__ALL_VERSION_WITHOUT_PERFORM(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaWithDeleteIndNoDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__FILTER_DUPS__NO_VERSIONING(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaWithDeleteIndWithDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_DEL_IND__NO_DEDUP__ALL_VERSION(); assertDerivedMainDataset(scenario); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdDateTimeBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdDateTimeBasedDerivationTest.java index 3fb34706ea5..4d760207656 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdDateTimeBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaBatchIdDateTimeBasedDerivationTest.java @@ -28,28 +28,28 @@ public class UnitemporalDeltaBatchIdDateTimeBasedDerivationTest @Test void testUnitemporalDeltaNoDeleteIndNoDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaNoDeleteIndWithDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__NO_DEL_IND__FILTER_DUPS__ALL_VERSION_WITHOUT_PERFORM(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaWithDeleteIndNoDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaWithDeleteIndWithDataSplits() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_DEL_IND__FAIL_ON_DUP__ALL_VERSION(); assertDerivedMainDataset(scenario); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaDateTimeBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaDateTimeBasedDerivationTest.java index ffd318b803b..3380781c9e4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaDateTimeBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalDeltaDateTimeBasedDerivationTest.java @@ -28,28 +28,28 @@ public class UnitemporalDeltaDateTimeBasedDerivationTest @Test void testUnitemporalDeltaNoDeleteIndNoDataSplits() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DEDUP__NO_VERSIONING(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaNoDeleteIndWithDataSplits() { - TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__FAIL_ON_DUPS__ALL_VERSION_WITHOUT_PERFORM(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaWithDeleteIndNoDataSplits() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test void testUnitemporalDeltaWithDeleteIndWithDataSplits() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__FILTER_DUPS__ALL_VERSION(); assertDerivedMainDataset(scenario); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdBasedDerivationTest.java index 75d4d978f8f..905e3c2faa8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdBasedDerivationTest.java @@ -26,23 +26,30 @@ public class UnitemporalSnapshotBatchIdBasedDerivationTest UnitemporalSnapshotBatchIdBasedScenarios scenarios = new UnitemporalSnapshotBatchIdBasedScenarios(); @Test - void testUnitemporalSnapshotWithoutPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test - void testUnitemporalSnapshotWithPartitionNoDataSplits() + void testUnitemporalSnapshotWithPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test - void testUnitemporalSnapshotWithPartitionFilterNoDataSplits() + void testUnitemporalSnapshotWithPartitionFailOnDupsNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__FAIL_ON_DUPS__NO_VERSION(); + assertDerivedMainDataset(scenario); + } + + @Test + void testUnitemporalSnapshotWithPartitionFilterNoDedupNoVersion() + { + TestScenario scenario = scenarios.BATCH_ID_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdDateTimeBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdDateTimeBasedDerivationTest.java index 2dce5f71f0c..f6bd9419e32 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdDateTimeBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotBatchIdDateTimeBasedDerivationTest.java @@ -26,23 +26,37 @@ public class UnitemporalSnapshotBatchIdDateTimeBasedDerivationTest UnitemporalSnapshotBatchIdDateTimeBasedScenarios scenarios = new UnitemporalSnapshotBatchIdDateTimeBasedScenarios(); @Test - void testUnitemporalSnapshotWithoutPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test - void testUnitemporalSnapshotWithPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__MAX_VERSION(); assertDerivedMainDataset(scenario); } @Test - void testUnitemporalSnapshotWithPartitionFilterNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionFilterDupsMaxVersion() { - TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__FILTER_DUPS__MAX_VERSION(); + assertDerivedMainDataset(scenario); + } + + @Test + void testUnitemporalSnapshotWithPartitionNoDedupNoVersion() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); + assertDerivedMainDataset(scenario); + } + + @Test + void testUnitemporalSnapshotWithPartitionFilterNoDedupNoVersion() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotDateTimeBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotDateTimeBasedDerivationTest.java index 5e7e4b817a6..0194e70c58e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotDateTimeBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/derivation/UnitemporalSnapshotDateTimeBasedDerivationTest.java @@ -26,23 +26,30 @@ public class UnitemporalSnapshotDateTimeBasedDerivationTest UnitemporalSnapshotDateTimeBasedScenarios scenarios = new UnitemporalSnapshotDateTimeBasedScenarios(); @Test - void testUnitemporalSnapshotWithoutPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } @Test - void testUnitemporalSnapshotWithPartitionNoDataSplits() + void testUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__FAIL_ON_DUP__MAX_VERSION(); assertDerivedMainDataset(scenario); } @Test - void testUnitemporalSnapshotWithPartitionFilterNoDataSplits() + void testUnitemporalSnapshotWithPartitionNoDedupNoVersion() { - TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITIONS__NO_DEDUP__NO_VERSION(); + assertDerivedMainDataset(scenario); + } + + @Test + void testUnitemporalSnapshotWithPartitionFilterNoDedupNoVersion() + { + TestScenario scenario = scenarios.DATETIME_BASED__WITH_PARTITION_FILTER__NO_DEDUP__NO_VERSION(); assertDerivedMainDataset(scenario); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/extension/PersistenceTestRunner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/extension/PersistenceTestRunner.java index 4c20e8cdee6..68989b4fdc9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/extension/PersistenceTestRunner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/extension/PersistenceTestRunner.java @@ -194,7 +194,7 @@ private IngestorResult invokePersistence(Dataset targetDataset, Persistence pers .enableSchemaEvolution(SCHEMA_EVOLUTION_DEFAULT) .build(); - IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(connection), enrichedDatasets); + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(connection), enrichedDatasets).get(0); return result; } @@ -212,7 +212,7 @@ private IngestorResult invokePersistence(Dataset targetDataset, ServiceOutputTar .enableSchemaEvolution(SCHEMA_EVOLUTION_DEFAULT) .build(); - IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(connection), enrichedDatasets); + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(connection), enrichedDatasets).get(0); return result; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java index 0ba21b06a7b..7e97be5066e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java @@ -30,12 +30,9 @@ public class AppendOnlyMapper { public static org.finos.legend.engine.persistence.components.ingestmode.AppendOnly from(AppendOnly appendOnly) { - DeduplicationStrategy deduplicationStrategy = appendOnly.filterDuplicates ? - FilterDuplicates.builder().build() : AllowDuplicates.builder().build(); - return org.finos.legend.engine.persistence.components.ingestmode.AppendOnly.builder() .digestField(DIGEST_FIELD_DEFAULT) - .deduplicationStrategy(deduplicationStrategy) + .filterExistingRecords(appendOnly.filterDuplicates) .auditing(appendOnly.auditing.accept(MappingVisitors.MAP_TO_COMPONENT_AUDITING)) .build(); } @@ -47,22 +44,16 @@ public static org.finos.legend.engine.persistence.components.ingestmode.AppendOn { temporality.auditing = new NoAuditing(); } + boolean filterExistingRecords = false; org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.AppendOnly appendOnlyHandling = (org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.AppendOnly) temporality.updatesHandling; if (appendOnlyHandling.appendStrategy instanceof org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.appendStrategy.FilterDuplicates) { - deduplicationStrategy = FilterDuplicates.builder().build(); - } - else if (appendOnlyHandling.appendStrategy instanceof org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.appendStrategy.FailOnDuplicates) - { - deduplicationStrategy = FailOnDuplicates.builder().build(); - } - else - { - deduplicationStrategy = AllowDuplicates.builder().build(); + filterExistingRecords = true; } + return org.finos.legend.engine.persistence.components.ingestmode.AppendOnly.builder() .digestField(DIGEST_FIELD_DEFAULT) - .deduplicationStrategy(deduplicationStrategy) + .filterExistingRecords(filterExistingRecords) .auditing(temporality.auditing.accept(org.finos.legend.engine.testable.persistence.mapper.v2.MappingVisitors.MAP_TO_COMPONENT_NONTEMPORAL_AUDITING)) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java index 20482a67222..aeb17585d55 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java @@ -82,7 +82,8 @@ public Datasets visit(AppendOnly appendOnly) { enrichMainSchemaWithDigest(); } - appendOnly.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema)); + boolean baseSchemaHasPks = baseSchema.fields().stream().anyMatch(field -> field.primaryKey()); + appendOnly.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema, baseSchemaHasPks)); Dataset enrichedMainDataset = mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.build()).build(); return Datasets.of(enrichedMainDataset, stagingDataset); @@ -123,7 +124,7 @@ public Datasets visit(NontemporalDelta nontemporalDelta) Dataset stagingDataset = stagingDatasetBuilder.schema(stagingSchemaDefinitionBuilder.build()).build(); enrichMainSchemaWithDigest(); - nontemporalDelta.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema)); + nontemporalDelta.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema, true)); Dataset enrichedMainDataset = mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.build()).build(); return Datasets.of(enrichedMainDataset, stagingDataset); @@ -133,8 +134,8 @@ public Datasets visit(NontemporalDelta nontemporalDelta) public Datasets visit(NontemporalSnapshot nontemporalSnapshot) { Dataset stagingDataset = stagingDatasetBuilder.schema(stagingSchemaDefinitionBuilder.build()).build(); - - nontemporalSnapshot.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema)); + boolean baseSchemaHasPks = baseSchema.fields().stream().anyMatch(field -> field.primaryKey()); + nontemporalSnapshot.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema, baseSchemaHasPks)); Dataset enrichedMainDataset = mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.build()).build(); return Datasets.of(enrichedMainDataset, stagingDataset); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/MappingVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/MappingVisitors.java index ead0fbc76f0..0b61881f9ab 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/MappingVisitors.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/MappingVisitors.java @@ -154,10 +154,13 @@ public static class EnrichSchemaWithAuditing implements AuditingVisitor private SchemaDefinition.Builder schemaDefinitionBuilder; private SchemaDefinition baseSchema; - public EnrichSchemaWithAuditing(SchemaDefinition.Builder schemaDefinitionBuilder, SchemaDefinition baseSchema) + private boolean isPk; + + public EnrichSchemaWithAuditing(SchemaDefinition.Builder schemaDefinitionBuilder, SchemaDefinition baseSchema, boolean isPk) { this.schemaDefinitionBuilder = schemaDefinitionBuilder; this.baseSchema = baseSchema; + this.isPk = isPk; } @Override @@ -175,6 +178,7 @@ public Void visit(DateTimeAuditing auditing) Field auditDateTime = Field.builder() .name(auditing.dateTimeName) .type(FieldType.of(DataType.TIMESTAMP, Optional.empty(), Optional.empty())) + .primaryKey(isPk) .build(); schemaDefinitionBuilder.addFields(auditDateTime); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithFilterDuplicates.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithFilterDuplicates.java index 36eef2df357..a28c89e100c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithFilterDuplicates.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithFilterDuplicates.java @@ -25,18 +25,6 @@ public class TestAppendOnlyWithFilterDuplicates extends TestPersistenceBase { - @Test - public void testAppendOnlyWithNoAuditing() throws Exception - { - String path = "src/test/resources/append-only/filter_duplicates/persistence_no_audit.txt"; - String persistenceSpec = readPureCode(path); - TestResult result = testPersistence(persistenceSpec).results.get(0); - - assertTrue(result instanceof TestExecuted); - Assert.assertEquals(TestExecutionStatus.PASS, ((TestExecuted) result).testExecutionStatus); - Assert.assertEquals("test::TestPersistence", result.testable); - } - @Test public void testAppendOnlyWithDateTimeAuditing() throws Exception { @@ -50,19 +38,6 @@ public void testAppendOnlyWithDateTimeAuditing() throws Exception } // v2 tests - - @Test - public void testAppendOnlyWithNoAuditingV2() throws Exception - { - String path = "src/test/resources/v2/append-only/filter_duplicates/persistence_no_audit.txt"; - String persistenceSpec = readPureCode(path); - TestResult result = testPersistence(persistenceSpec).results.get(0); - - assertTrue(result instanceof TestExecuted); - Assert.assertEquals(TestExecutionStatus.PASS, ((TestExecuted) result).testExecutionStatus); - Assert.assertEquals("test::TestPersistence", result.testable); - } - @Test public void testAppendOnlyWithDateTimeAuditingV2() throws Exception { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java index 077e8bfa41f..0d6245a757d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java @@ -84,7 +84,7 @@ public void testMapperForAppendOnly() throws Exception AppendOnly appendOnly = (AppendOnly) componentIngestMode; Assert.assertEquals("DIGEST", appendOnly.digestField().get()); Assert.assertTrue(appendOnly.auditing() instanceof NoAuditing); - Assert.assertTrue(appendOnly.deduplicationStrategy() instanceof AllowDuplicates); + Assert.assertFalse(appendOnly.filterExistingRecords()); ingestMode = getAppendOnlyNoAuditingWithFilteringDuplicates(); persistence = getPersistence(ingestMode); @@ -95,7 +95,7 @@ public void testMapperForAppendOnly() throws Exception appendOnly = (AppendOnly) componentIngestMode; Assert.assertEquals("DIGEST", appendOnly.digestField().get()); Assert.assertTrue(appendOnly.auditing() instanceof NoAuditing); - Assert.assertTrue(appendOnly.deduplicationStrategy() instanceof FilterDuplicates); + Assert.assertTrue(appendOnly.filterExistingRecords()); ingestMode = getAppendOnlyDatetimeAuditingNoFilteringDuplicates(); persistence = getPersistence(ingestMode); @@ -108,7 +108,7 @@ public void testMapperForAppendOnly() throws Exception Assert.assertTrue(appendOnly.auditing() instanceof DateTimeAuditing); DateTimeAuditing dateTimeAuditing = (DateTimeAuditing) appendOnly.auditing(); Assert.assertEquals("AUDIT_TIME", dateTimeAuditing.dateTimeField()); - Assert.assertTrue(appendOnly.deduplicationStrategy() instanceof AllowDuplicates); + Assert.assertFalse(appendOnly.filterExistingRecords()); ingestMode = getAppendOnlyDatetimeAuditingWithFilteringDuplicates(); persistence = getPersistence(ingestMode); @@ -121,7 +121,7 @@ public void testMapperForAppendOnly() throws Exception Assert.assertTrue(appendOnly.auditing() instanceof DateTimeAuditing); dateTimeAuditing = (DateTimeAuditing) appendOnly.auditing(); Assert.assertEquals("AUDIT_TIME", dateTimeAuditing.dateTimeField()); - Assert.assertTrue(appendOnly.deduplicationStrategy() instanceof FilterDuplicates); + Assert.assertTrue(appendOnly.filterExistingRecords()); } @Test diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/append-only/filter_duplicates/persistence_no_audit.txt b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/append-only/filter_duplicates/persistence_no_audit.txt deleted file mode 100644 index 0a2f54a8d73..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/append-only/filter_duplicates/persistence_no_audit.txt +++ /dev/null @@ -1,134 +0,0 @@ -###Pure -Class test::Person -{ - name: String[1]; -} - -Class test::ServiceResult -{ - ID: String[1]; - NAME: String[1]; -} - -###Mapping -Mapping test::Mapping () - -###Relational -Database test::TestDatabase -( - Table personTable - ( - ID INTEGER PRIMARY KEY, - NAME VARCHAR(100) - ) -) -###Service -Service test::Service -{ - pattern : 'test'; - documentation : 'test'; - autoActivateUpdates: true; - execution: Single - { - query: src: test::Person[1]|$src.name; - mapping: test::Mapping; - runtime: - #{ - connections: []; - }#; - } - test: Single - { - data: 'test'; - asserts: []; - } -} - -###Persistence -Persistence test::TestPersistence -{ - doc: 'This is test documentation.'; - trigger: Manual; - service: test::Service; - persister: Batch - { - sink: Relational - { - database: test::TestDatabase; - } - ingestMode: AppendOnly - { - auditing: None; - filterDuplicates: true; - } - targetShape: Flat - { - targetName: 'personTable'; - modelClass: test::ServiceResult; - } - } - tests: - [ - test1: - { - testBatches: - [ - testBatch1: - { - data: - { - connection: - { - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":1, "NAME":"ANDY"},{"ID":2, "NAME":"BRAD"}]'; - }# - } - } - asserts: - [ - assert1: - EqualToJson - #{ - expected: - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":1, "NAME":"ANDY"},{"ID":2, "NAME":"BRAD"}]'; - }#; - }# - ] - }, - testBatch2: - { - data: - { - connection: - { - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":2, "NAME":"BRAD"},{"ID":3, "NAME":"CATHY"}]'; - }# - } - } - asserts: - [ - assert1: - EqualToJson - #{ - expected: - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":1, "NAME":"ANDY"},{"ID":2, "NAME":"BRAD"},{"ID":3, "NAME":"CATHY"}]'; - }#; - }# - ] - } - ] - isTestDataFromServiceOutput: true; - } - ] -} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_date_time_auditing.txt b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_date_time_auditing.txt index 4c90c627db5..fc50f93b78e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_date_time_auditing.txt +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_date_time_auditing.txt @@ -20,7 +20,7 @@ Database test::TestDatabase ( ID INTEGER PRIMARY KEY, NAME VARCHAR(100), - BATCH_TIME_IN TIMESTAMP + BATCH_TIME_IN TIMESTAMP PRIMARY KEY ) ) ###Service diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_no_audit.txt b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_no_audit.txt deleted file mode 100644 index 58068e34a3c..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/resources/v2/append-only/filter_duplicates/persistence_no_audit.txt +++ /dev/null @@ -1,143 +0,0 @@ -###Pure -Class test::Person -{ - name: String[1]; -} - -Class test::ServiceResult -{ - ID: String[1]; - NAME: String[1]; -} - -###Mapping -Mapping test::Mapping () - -###Relational -Database test::TestDatabase -( - Table personTable - ( - ID INTEGER PRIMARY KEY, - NAME VARCHAR(100) - ) -) -###Service -Service test::Service -{ - pattern : 'test'; - documentation : 'test'; - autoActivateUpdates: true; - execution: Single - { - query: src: test::Person[1]|$src.name; - mapping: test::Mapping; - runtime: - #{ - connections: []; - }#; - } - test: Single - { - data: 'test'; - asserts: []; - } -} - -###Persistence -Persistence test::TestPersistence -{ - doc: 'This is test documentation.'; - trigger: Manual; - service: test::Service; - serviceOutputTargets: - [ - TDS - { - keys: [ID] - deduplication: None; - datasetType: Delta - { - actionIndicator: None; - } - } - -> - Relational - #{ - table: personTable; - database: test::TestDatabase; - temporality: None - { - auditing: None; - updatesHandling: AppendOnly - { - appendStrategy: FilterDuplicates; - } - } - }# - ]; - tests: - [ - test1: - { - testBatches: - [ - testBatch1: - { - data: - { - connection: - { - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":1, "NAME":"ANDY"},{"ID":2, "NAME":"BRAD"}]'; - }# - } - } - asserts: - [ - assert1: - EqualToJson - #{ - expected: - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":1, "NAME":"ANDY"},{"ID":2, "NAME":"BRAD"}]'; - }#; - }# - ] - }, - testBatch2: - { - data: - { - connection: - { - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":2, "NAME":"BRAD"},{"ID":3, "NAME":"CATHY"}]'; - }# - } - } - asserts: - [ - assert1: - EqualToJson - #{ - expected: - ExternalFormat - #{ - contentType: 'application/json'; - data: '[{"ID":1, "NAME":"ANDY"},{"ID":2, "NAME":"BRAD"},{"ID":3, "NAME":"CATHY"}]'; - }#; - }# - ] - } - ] - isTestDataFromServiceOutput: true; - } - ] -} \ No newline at end of file