From 453f3efc707fd9d006150271c517e681754ae56c Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Fri, 6 Oct 2023 15:10:38 +0800 Subject: [PATCH] Fix the sqls for dedup and versioning and add H2 tests --- .../components/util/LogicalPlanUtils.java | 3 +- .../nontemporal/NontemporalSnapshotTest.java | 6 +- .../versioning/TestDedupAndVersioning.java | 73 ++++++++++--------- .../data_expected_with_data_splits.csv | 7 ++ .../data_expected_with_duplicates.csv | 6 ++ .../data_expected_with_max_version.csv | 5 ++ ...cted_without_dups_and_with_data_splits.csv | 6 ++ 7 files changed, 66 insertions(+), 40 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_data_splits.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_duplicates.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_max_version.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_without_dups_and_with_data_splits.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 9d2a948a6a9..c8f91cd30ff 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -460,7 +460,8 @@ public static Dataset getTempStagingDatasetDefinition(Dataset stagingDataset, In public static Dataset getDedupedAndVersionedDataset(DeduplicationStrategy deduplicationStrategy, VersioningStrategy versioningStrategy, Dataset stagingDataset, List primaryKeys) { Dataset dedupedDataset = deduplicationStrategy.accept(new DatasetDeduplicationHandler(stagingDataset)); - if (dedupedDataset instanceof Selection) + boolean isTempTableNeededForVersioning = versioningStrategy.accept(VersioningVisitors.IS_TEMP_TABLE_NEEDED); + if (isTempTableNeededForVersioning && dedupedDataset instanceof Selection) { Selection selection = (Selection) dedupedDataset; dedupedDataset = selection.withAlias(stagingDataset.datasetReference().alias()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index 00ba03c3e5b..e0fdbc0ec8a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -86,9 +86,9 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato String cleanupTempStagingTableSql = "DELETE FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage"; String insertTempStagingTableSql = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\") " + - "((SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",COUNT(*) as \"legend_persistence_count\" " + - "FROM \"mydb\".\"staging\" as stage GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\") as stage)"; - + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + + "COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\")"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableWithAuditPkCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(createTempStagingTable, preActionsSqlList.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java index 5acdb58ba4f..5e02d54d484 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -30,9 +30,11 @@ import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import static org.finos.legend.engine.persistence.components.TestUtils.*; +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_STAGING_DATASET_BASE_NAME; public class TestDedupAndVersioning extends BaseTest { @@ -89,16 +91,10 @@ void testNoDedupNoVersioning() throws Exception IngestMode ingestMode = NontemporalSnapshot.builder() .auditing(NoAuditing.builder().build()) .build(); - RelationalIngestor ingestor = RelationalIngestor.builder() - .ingestMode(ingestMode) - .relationalSink(H2Sink.get()) - .build(); - - // Create staging Table without PKS and load Data into it - Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); - datasets = ingestor.create(datasets); - datasets = ingestor.dedupAndVersion(datasets); + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + Assertions.assertEquals(false, h2Sink.doesTableExist(getTempStagingDataset())); } @Test @@ -112,17 +108,12 @@ void testFilterDupsNoVersioning() throws Exception .auditing(NoAuditing.builder().build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .build(); - RelationalIngestor ingestor = RelationalIngestor.builder() - .ingestMode(ingestMode) - .relationalSink(H2Sink.get()) - .build(); - - // Create staging Table without PKS and load Data into it createStagingTableWithoutVersion(); + // TODO LOAD DATA - Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); - datasets = ingestor.create(datasets); - datasets = ingestor.dedupAndVersion(datasets); + performDedupAndVersioining(datasets, ingestMode); + // Validate tempTableExists + Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset())); } @@ -138,17 +129,14 @@ void testFilterDupsMaxVersion() throws Exception .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(MaxVersionStrategy.builder().versioningField("version").build()) .build(); - RelationalIngestor ingestor = RelationalIngestor.builder() - .ingestMode(ingestMode) - .relationalSink(H2Sink.get()) - .build(); - // Create staging Table without PKS and load Data into it createStagingTableWithVersion(); + // TODO LOAD DATA - Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); - datasets = ingestor.create(datasets); - datasets = ingestor.dedupAndVersion(datasets); + performDedupAndVersioining(datasets, ingestMode); + + // Validate tempTableExists + Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset())); } @Test @@ -164,17 +152,14 @@ void testFilterDupsAllVersion() throws Exception .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version").build()) .build(); - RelationalIngestor ingestor = RelationalIngestor.builder() - .ingestMode(ingestMode) - .relationalSink(H2Sink.get()) - .build(); - // Create staging Table without PKS and load Data into it createStagingTableWithVersion(); + // TODO LOAD DATA - Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); - datasets = ingestor.create(datasets); - datasets = ingestor.dedupAndVersion(datasets); + performDedupAndVersioining(datasets, ingestMode); + + // Validate tempTableExists + Assertions.assertEquals(true, h2Sink.doesTableExist(getTempStagingDataset())); } private DatasetDefinition getStagingTableWithoutVersion() @@ -186,6 +171,14 @@ private DatasetDefinition getStagingTableWithoutVersion() .build(); } + private Dataset getTempStagingDataset() + { + return DatasetReferenceImpl.builder() + .group(testSchemaName) + .name(stagingTableName + "_" + TEMP_STAGING_DATASET_BASE_NAME) + .build(); + } + private DatasetDefinition getStagingTableWithVersion() { return DatasetDefinition.builder() @@ -219,7 +212,15 @@ private void createStagingTableWithVersion() h2Sink.executeStatement(createSql); } + private static void performDedupAndVersioining(Datasets datasets, IngestMode ingestMode) { + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .build(); - - + Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); + datasets = ingestor.create(datasets); + datasets = ingestor.dedupAndVersion(datasets); } + +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_data_splits.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_data_splits.csv new file mode 100644 index 00000000000..4ec38cb111f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_data_splits.csv @@ -0,0 +1,7 @@ +1,Andy,1,1000,2012-01-01,digest1,1 +1,Andy,2,2000,2012-01-02,digest2,2 +1,Andy,3,3000,2012-01-03,digest3,3 +2,Becky,1,4000,2012-01-04,digest4,1 +2,Becky,1,4000,2012-01-04,digest4,1 +3,Cathy,1,5000,2012-01-05,digest5,1 +3,Cathy,1,6000,2012-01-06,digest6,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_duplicates.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_duplicates.csv new file mode 100644 index 00000000000..5d2b28c46a9 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_duplicates.csv @@ -0,0 +1,6 @@ +1,Andy,1000,2012-01-01,digest1 +1,Andy,1000,2012-01-01,digest1 +1,Andy,1000,2012-01-01,digest1 +2,Becky,2000,2012-01-02,digest2 +2,Becky,2000,2012-01-02,digest2 +3,Cathy,3000,2012-01-03,digest3 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_max_version.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_max_version.csv new file mode 100644 index 00000000000..4f351fc2dda --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_with_max_version.csv @@ -0,0 +1,5 @@ +1,Andy,3,3000,2012-01-03,digest3 +2,Becky,1,4000,2012-01-04,digest4 +2,Becky,1,4000,2012-01-04,digest4 +3,Cathy,1,5000,2012-01-05,digest5 +3,Cathy,1,6000,2012-01-06,digest6 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_without_dups_and_with_data_splits.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_without_dups_and_with_data_splits.csv new file mode 100644 index 00000000000..0ccd0abe9a4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/dedup-and-versioning/expected/data_expected_without_dups_and_with_data_splits.csv @@ -0,0 +1,6 @@ +1,Andy,1,1000,2012-01-01,digest1,1,1 +1,Andy,2,2000,2012-01-02,digest2,1,2 +1,Andy,3,3000,2012-01-03,digest3,1,3 +2,Becky,1,4000,2012-01-04,digest4,2,1 +3,Cathy,1,5000,2012-01-05,digest5,1,1 +3,Cathy,1,6000,2012-01-06,digest6,1,1 \ No newline at end of file