From 27c758336440e613ee616cfeeaa225d32609bf6c Mon Sep 17 00:00:00 2001 From: Zhang Lizhi Date: Mon, 4 Dec 2023 21:46:06 +0800 Subject: [PATCH] Persistence Component: Append Only Digest Generation and Other Fixes (#2451) --- .../components/common/FileFormatType.java | 23 ++ .../ingestmode/AppendOnlyAbstract.java | 10 +- .../DeriveMainDatasetSchemaFromStaging.java | 11 +- .../ingestmode/IngestModeCaseConverter.java | 12 +- .../ingestmode/IngestModeVisitors.java | 19 +- .../digest/DigestGenStrategyVisitor.java | 2 + .../digest/DigestGenerationHandler.java | 67 +++++ ...UserProvidedDigestGenStrategyAbstract.java | 36 +++ .../DatasetAdditionalPropertiesAbstract.java | 2 +- .../datasets/DatasetCaseConverter.java | 18 ++ .../datasets/FilteredDatasetAbstract.java | 31 ++ .../datasets/IcebergPropertiesAbstract.java} | 28 +- .../datasets/SchemaDefinitionAbstract.java | 8 + .../StagedFilesDatasetProperties.java | 19 +- .../logicalplan/operations/CopyAbstract.java | 5 + .../logicalplan/values/FunctionName.java | 3 +- .../components/planner/AppendOnlyPlanner.java | 34 ++- .../components/planner/BulkLoadPlanner.java | 72 ++--- .../components/planner/Planner.java | 14 +- .../relational/ansi/AnsiSqlSink.java | 3 + .../ansi/optimizer/StringCaseOptimizer.java | 2 +- .../DatasetAdditionalPropertiesVisitor.java | 21 +- .../sql/visitors/DerivedDatasetVisitor.java | 1 - .../sql/visitors/FilteredDatasetVisitor.java | 43 +++ .../ansi/sql/visitors/SelectionVisitor.java | 8 + .../ansi/sql/visitors/ShowVisitor.java | 15 +- .../nontemporal/AppendOnlyTest.java | 26 ++ .../nontemporal/NontemporalDeltaTest.java | 30 ++ .../UnitemporalDeltaBatchIdBasedTest.java | 76 +++++ .../relational/bigquery/BigQuerySink.java | 3 + .../bigquery/executor/BigQueryHelper.java | 18 +- ...yStagedFilesDatasetPropertiesAbstract.java | 10 +- .../sql/visitor/DatetimeValueVisitor.java | 2 +- .../bigquery/sql/visitor/SQLDropVisitor.java | 47 +++ .../StagedFilesDatasetReferenceVisitor.java | 40 +-- .../expressions/table/StagedFilesTable.java | 3 +- .../schemaops/statements/DropTable.java | 85 ++++++ .../e2e/AppendOnlyExecutorTest.java | 70 ++++- .../e2e/AppendOnlyGeneratorTest.java | 67 ++++- .../components/e2e/BigQueryEndToEndTest.java | 8 +- .../components/e2e/BulkLoadExecutorTest.java | 22 +- .../components/e2e/BulkLoadGeneratorTest.java | 8 +- .../components/ingestmode/AppendOnlyTest.java | 101 ++++++- .../ingestmode/BigQueryTestArtifacts.java | 44 ++- ...eltaSourceSpecifiesFromAndThroughTest.java | 20 +- ...itemporalDeltaSourceSpecifiesFromTest.java | 57 ++-- .../components/ingestmode/BulkLoadTest.java | 81 +++--- .../components/ingestmode/IngestModeTest.java | 4 +- .../ingestmode/NontemporalDeltaTest.java | 40 ++- .../ingestmode/NontemporalSnapshotTest.java | 4 +- .../UnitemporalDeltaBatchIdBasedTest.java | 93 ++++++ ...temporalDeltaBatchIdDateTimeBasedTest.java | 44 +-- .../UnitemporalDeltaDateTimeBasedTest.java | 56 ++-- ...poralSnapshotBatchIdDateTimeBasedTest.java | 34 +-- .../UnitemporalSnapshotDateTimeBasedTest.java | 52 ++-- .../transformer/PlaceholderTest.java | 6 +- .../BulkLoadDatasetUtilsBigQueryTest.java | 4 +- .../append/digest_generation/data_pass1.csv | 3 + .../append/digest_generation/data_pass2.csv | 6 + .../append/digest_generation/data_pass3.csv | 3 + .../append/digest_generation/data_pass4.csv | 6 + .../input/digest_generation/data_pass1.csv | 3 + .../input/digest_generation/data_pass2.csv | 3 + .../input/digest_generation/data_pass3.csv | 3 + .../input/digest_generation/data_pass4.csv | 3 + .../api/RelationalGeneratorAbstract.java | 4 +- .../api/RelationalIngestorAbstract.java | 6 +- .../sqldom/common/FunctionName.java | 3 +- .../schemaops/statements/DeleteStatement.java | 6 +- .../schemaops/statements/ShowCommand.java | 22 +- .../sqldom/schemaops/ShowCommandTest.java | 8 +- ...2StagedFilesDatasetPropertiesAbstract.java | 12 +- .../h2/sql/visitor/DigestUdfVisitor.java | 19 +- .../StagedFilesDatasetReferenceVisitor.java | 2 +- .../sql/visitor/ToArrayFunctionVisitor.java | 15 +- .../persistence/components/BaseTest.java | 10 + .../persistence/components/TestUtils.java | 190 +++++++++++++ .../BitemporalDeltaWithBatchIdTest.java | 54 ++++ .../ingestmode/bulkload/BulkLoadTest.java | 64 ++--- .../nontemporal/AppendOnlyTest.java | 256 ++++++++++++++++- .../nontemporal/NontemporalDeltaTest.java | 80 +++++- .../nontemporal/NontemporalSnapshotTest.java | 45 ++- .../unitemporal/UnitemporalDeltaTest.java | 60 ++++ .../unitemporal/UnitemporalSnapshotTest.java | 65 +++++ .../operations/SchemaEvolutionTest.java | 20 +- .../versioning/TestDedupAndVersioning.java | 15 +- .../expected_pass1.csv | 3 + .../expected_pass2.csv | 12 + .../staging_data_pass1.csv | 5 + .../staging_data_pass2.csv | 10 + .../digest_generation/expected_pass1.csv | 3 + .../digest_generation/expected_pass2.csv | 6 + .../digest_generation2/expected_pass1.csv | 4 + .../digest_generation2/expected_pass2.csv | 7 + .../with_staging_filter/expected_pass1.csv | 2 + .../with_staging_filter/expected_pass2.csv | 4 + .../input/digest_generation/data_pass1.csv | 3 + .../input/digest_generation/data_pass2.csv | 3 + .../input/digest_generation2/data_pass1.csv | 4 + .../input/digest_generation2/data_pass2.csv | 4 + .../input/with_staging_filter/data_pass1.csv | 4 + .../input/with_staging_filter/data_pass2.csv | 3 + .../with_staging_filter/expected_pass1.csv | 5 + .../with_staging_filter/expected_pass2.csv | 3 + .../input/with_staging_filter/data_pass1.csv | 8 + .../input/with_staging_filter/data_pass2.csv | 8 + .../with_staging_filter/expected_pass1.csv | 4 + .../with_staging_filter/expected_pass2.csv | 5 + .../with_staging_filter/expected_pass3.csv | 5 + .../staging_data_pass1.csv | 9 + .../staging_data_pass2.csv | 4 + .../memsql/optimizer/StringCaseOptimizer.java | 4 +- .../memsql/sql/visitor/SQLCreateVisitor.java | 3 +- .../sql/visitor/SchemaDefinitionVisitor.java | 6 +- .../memsql/sql/visitor/ShowVisitor.java | 33 +-- .../schemaops/statements/ShowCommand.java | 38 +-- .../components/ingestmode/AppendOnlyTest.java | 25 ++ ...itemporalDeltaSourceSpecifiesFromTest.java | 20 +- .../ingestmode/MemsqlTestArtifacts.java | 269 +++++++++--------- .../ingestmode/NontemporalDeltaTest.java | 30 ++ .../UnitemporalDeltaBatchIdBasedTest.java | 94 +++++- ...temporalDeltaBatchIdDateTimeBasedTest.java | 6 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 2 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 2 +- .../logicalplan/operations/BaseTestUtils.java | 93 +++--- .../logicalplan/operations/CreateTest.java | 81 ++++-- .../logicalplan/operations/ShowTest.java | 12 +- .../logicalplan/datasets}/FileFormat.java | 9 +- ...eStagedFilesDatasetPropertiesAbstract.java | 6 +- .../datasets/StandardFileFormatAbstract.java | 35 +++ .../UserDefinedFileFormatAbstract.java} | 29 +- .../optmizer/StringCaseOptimizer.java | 4 +- .../snowflake/sql/visitor/CopyVisitor.java | 42 ++- .../DatasetAdditionalPropertiesVisitor.java | 10 +- .../snowflake/sql/visitor/ShowVisitor.java | 32 +-- .../StagedFilesDatasetReferenceVisitor.java | 2 - .../sqldom/common/IcebergProperties.java | 54 ++++ .../expressions/table/StagedFilesTable.java | 33 --- .../schemaops/statements/CopyStatement.java | 124 +++++++- .../schemaops/statements/CreateTable.java | 28 +- .../schemaops/statements/ShowCommand.java | 29 +- .../components/SnowflakeTestArtifacts.java | 14 + .../components/ingestmode/AppendOnlyTest.java | 103 +++++++ .../components/ingestmode/BulkLoadTest.java | 183 ++++++++++-- .../ingestmode/NontemporalDeltaMergeTest.java | 31 ++ .../operations/CreateTableTest.java | 12 +- .../logicalplan/operations/ShowTest.java | 6 +- .../sqldom/schemaops/CopyStatementTest.java | 38 ++- .../persistence/components/BaseTest.java | 51 ++++ .../scenarios/AppendOnlyScenarios.java | 51 +++- .../scenarios/NonTemporalDeltaScenarios.java | 9 + ...UnitemporalDeltaBatchIdBasedScenarios.java | 27 ++ .../nontemporal/AppendOnlyTestCases.java | 37 ++- .../NontemporalDeltaTestCases.java | 20 ++ .../AppendOnlyBasedDerivationTest.java | 10 +- ...nitmemporalDeltaBatchIdBasedTestCases.java | 39 ++- .../persistence/mapper/AppendOnlyMapper.java | 5 +- .../persistence/mapper/v1/DeriveDatasets.java | 5 +- .../mapper/v1/IngestModeVisitors.java | 5 +- .../mapper/v2/IngestModeMapper.java | 25 +- .../TestAppendOnlyWithAllowDuplicates.java | 4 +- .../mapper/IngestModeMapperTest.java | 13 +- .../mapper/TestFieldsToExclude.java | 3 +- 163 files changed, 3633 insertions(+), 989 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatType.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenerationHandler.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UserProvidedDigestGenStrategyAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/FilteredDatasetAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/{common/LoadOptionsAbstract.java => logicalplan/datasets/IcebergPropertiesAbstract.java} (62%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/FilteredDatasetVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/SQLDropVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/DropTable.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass3.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass4.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass3.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass4.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass3.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass2.csv rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/{legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common => legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets}/FileFormat.java (82%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/StandardFileFormatAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/{sqldom/common/ExternalVolume.java => logicalplan/datasets/UserDefinedFileFormatAbstract.java} (59%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/IcebergProperties.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatType.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatType.java new file mode 100644 index 00000000000..f5242a06056 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatType.java @@ -0,0 +1,23 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +public enum FileFormatType +{ + CSV, + JSON, + AVRO, + PARQUET; +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java index bfc0302b75e..e882a4899e3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyAbstract.java @@ -15,10 +15,10 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.ingestmode.audit.Auditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; import org.immutables.value.Value; -import java.util.Optional; - import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; @@ -32,7 +32,11 @@ ) public interface AppendOnlyAbstract extends IngestMode { - Optional digestField(); + @Value.Default + default DigestGenStrategy digestGenStrategy() + { + return NoDigestGenStrategy.builder().build(); + } Auditing auditing(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index b6e12b71adf..6253925d4d1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -70,10 +70,7 @@ public Dataset visitAppendOnly(AppendOnlyAbstract appendOnly) { boolean isAuditingFieldPK = doesDatasetContainsAnyPK(mainSchemaFields); appendOnly.auditing().accept(new EnrichSchemaWithAuditing(mainSchemaFields, isAuditingFieldPK)); - if (appendOnly.digestField().isPresent()) - { - addDigestField(mainSchemaFields, appendOnly.digestField().get()); - } + appendOnly.digestGenStrategy().accept(IngestModeVisitors.EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY).ifPresent(digest -> addDigestField(mainSchemaFields, digest)); removeDataSplitField(appendOnly.dataSplitField()); return mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.addAllFields(mainSchemaFields).build()).build(); } @@ -138,11 +135,7 @@ public Dataset visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) @Override public Dataset visitBulkLoad(BulkLoadAbstract bulkLoad) { - Optional digestField = bulkLoad.digestGenStrategy().accept(IngestModeVisitors.EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY); - if (digestField.isPresent()) - { - addDigestField(mainSchemaFields, digestField.get()); - } + bulkLoad.digestGenStrategy().accept(IngestModeVisitors.EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY).ifPresent(digest -> addDigestField(mainSchemaFields, digest)); Field batchIdField = Field.builder() .name(bulkLoad.batchIdField()) .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java index 220b55532bb..4edb15d29de 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java @@ -20,6 +20,8 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditingAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.versioning.*; import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; @@ -75,7 +77,7 @@ public IngestMode visitAppendOnly(AppendOnlyAbstract appendOnly) { return AppendOnly .builder() - .digestField(applyCase(appendOnly.digestField())) + .digestGenStrategy(appendOnly.digestGenStrategy().accept(new DigestGenStrategyCaseConverter())) .auditing(appendOnly.auditing().accept(new AuditingCaseConverter())) .deduplicationStrategy(appendOnly.deduplicationStrategy()) .versioningStrategy(appendOnly.versioningStrategy().accept(new VersionStrategyCaseConverter())) @@ -244,6 +246,14 @@ public DigestGenStrategy visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrateg .digestField(applyCase(udfBasedDigestGenStrategy.digestField())) .build(); } + + @Override + public DigestGenStrategy visitUserProvidedDigestGenStrategy(UserProvidedDigestGenStrategyAbstract userProvidedDigestGenStrategy) + { + return UserProvidedDigestGenStrategy.builder() + .digestField(applyCase(userProvidedDigestGenStrategy.digestField())) + .build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java index 0a255978ace..64f9db016e6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java @@ -18,6 +18,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitors; import java.util.Collections; @@ -37,7 +38,7 @@ private IngestModeVisitors() @Override public Boolean visitAppendOnly(AppendOnlyAbstract appendOnly) { - return appendOnly.filterExistingRecords(); + return appendOnly.digestGenStrategy().accept(DIGEST_GEN_STRATEGY_DIGEST_REQUIRED); } @Override @@ -88,7 +89,7 @@ public Boolean visitBulkLoad(BulkLoadAbstract bulkLoad) @Override public Optional visitAppendOnly(AppendOnlyAbstract appendOnly) { - return appendOnly.digestField(); + return appendOnly.digestGenStrategy().accept(EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY); } @Override @@ -140,7 +141,7 @@ public Optional visitBulkLoad(BulkLoadAbstract bulkLoad) public Set visitAppendOnly(AppendOnlyAbstract appendOnly) { Set metaFields = new HashSet<>(); - appendOnly.digestField().ifPresent(metaFields::add); + appendOnly.digestGenStrategy().accept(EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY).ifPresent(metaFields::add); appendOnly.dataSplitField().ifPresent(metaFields::add); return metaFields; } @@ -374,6 +375,12 @@ public Boolean visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract { return true; } + + @Override + public Boolean visitUserProvidedDigestGenStrategy(UserProvidedDigestGenStrategyAbstract userProvidedDigestGenStrategy) + { + return true; + } }; public static final DigestGenStrategyVisitor> EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY = new DigestGenStrategyVisitor>() @@ -389,6 +396,12 @@ public Optional visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategy { return Optional.of(udfBasedDigestGenStrategy.digestField()); } + + @Override + public Optional visitUserProvidedDigestGenStrategy(UserProvidedDigestGenStrategyAbstract userProvidedDigestGenStrategy) + { + return Optional.of(userProvidedDigestGenStrategy.digestField()); + } }; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java index a45a2e91735..37f5e8ab63a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java @@ -19,4 +19,6 @@ public interface DigestGenStrategyVisitor T visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy); T visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy); + + T visitUserProvidedDigestGenStrategy(UserProvidedDigestGenStrategyAbstract userProvidedDigestGenStrategy); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenerationHandler.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenerationHandler.java new file mode 100644 index 00000000000..73a8e592d85 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenerationHandler.java @@ -0,0 +1,67 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.digest; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; + +import java.util.List; +import java.util.stream.Collectors; + +public class DigestGenerationHandler implements DigestGenStrategyVisitor +{ + private List fieldsToSelect; + private List fieldsToInsert; + private Dataset stagingDataset; + private Dataset mainDataset; + + public DigestGenerationHandler(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert) + { + this.mainDataset = mainDataset; + this.stagingDataset = stagingDataset; + this.fieldsToSelect = fieldsToSelect; + this.fieldsToInsert = fieldsToInsert; + } + + @Override + public Void visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy) + { + return null; + } + + @Override + public Void visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) + { + Value digestValue = DigestUdf + .builder() + .udfName(udfBasedDigestGenStrategy.digestUdfName()) + .addAllFieldNames(stagingDataset.schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) + .addAllValues(fieldsToSelect) + .dataset(stagingDataset) + .build(); + String digestField = udfBasedDigestGenStrategy.digestField(); + fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(digestField).build()); + fieldsToSelect.add(digestValue); + return null; + } + + @Override + public Void visitUserProvidedDigestGenStrategy(UserProvidedDigestGenStrategyAbstract userProvidedDigestGenStrategy) + { + return null; + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UserProvidedDigestGenStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UserProvidedDigestGenStrategyAbstract.java new file mode 100644 index 00000000000..79577b91aee --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UserProvidedDigestGenStrategyAbstract.java @@ -0,0 +1,36 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.digest; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface UserProvidedDigestGenStrategyAbstract extends DigestGenStrategy +{ + String digestField(); + + @Override + default T accept(DigestGenStrategyVisitor visitor) + { + return visitor.visitUserProvidedDigestGenStrategy(this); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetAdditionalPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetAdditionalPropertiesAbstract.java index 3592f7e8c08..7959a0309df 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetAdditionalPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetAdditionalPropertiesAbstract.java @@ -35,7 +35,7 @@ public interface DatasetAdditionalPropertiesAbstract extends LogicalPlanNode Optional tableOrigin(); - Optional externalVolume(); + Optional icebergProperties(); Map tags(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java index bf3415061b5..28d5d27cba6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java @@ -119,6 +119,24 @@ public Dataset applyCaseOnDataset(Dataset dataset, Function stra return derivedDataset; } + if (dataset instanceof FilteredDataset) + { + FilteredDataset filteredDataset = FilteredDataset.builder() + .name(newName.orElseThrow(IllegalStateException::new)) + .group(newSchemaName) + .database(newDatabaseName) + .schema(schemaDefinition) + .filter(((FilteredDataset) dataset).filter()) + .datasetAdditionalProperties(dataset.datasetAdditionalProperties()) + .build(); + + if (dataset.datasetReference().alias().isPresent()) + { + filteredDataset = filteredDataset.withAlias(dataset.datasetReference().alias().get()); + } + return filteredDataset; + } + if (dataset instanceof StagedFilesDataset) { StagedFilesDataset stagedFilesDataset = StagedFilesDataset.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/FilteredDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/FilteredDatasetAbstract.java new file mode 100644 index 00000000000..28d4e193365 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/FilteredDatasetAbstract.java @@ -0,0 +1,31 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.datasets; + +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface FilteredDatasetAbstract extends DatasetDefinitionAbstract +{ + Condition filter(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/LoadOptionsAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/IcebergPropertiesAbstract.java similarity index 62% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/LoadOptionsAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/IcebergPropertiesAbstract.java index c299b0b7aa5..3a16a681692 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/LoadOptionsAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/IcebergPropertiesAbstract.java @@ -12,33 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.common; +package org.finos.legend.engine.persistence.components.logicalplan.datasets; -import org.immutables.value.Value; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.immutables.value.Value.Immutable; +import org.immutables.value.Value.Style; -import java.util.Optional; - -@Value.Immutable -@Value.Style( +@Immutable +@Style( typeAbstract = "*Abstract", typeImmutable = "*", jdkOnly = true, optionalAcceptNullable = true, strictBuilder = true ) -public interface LoadOptionsAbstract +public interface IcebergPropertiesAbstract extends LogicalPlanNode { - Optional fieldDelimiter(); - - Optional encoding(); - - Optional nullMarker(); - - Optional quote(); - - Optional skipLeadingRows(); + String catalog(); - Optional maxBadRecords(); + String externalVolume(); - Optional compression(); + String baseLocation(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SchemaDefinitionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SchemaDefinitionAbstract.java index 549b6f4306d..21539cd38a9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SchemaDefinitionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/SchemaDefinitionAbstract.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; +import org.immutables.value.Value; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Style; @@ -68,5 +69,12 @@ interface ColumnStoreSpecificationAbstract interface ShardSpecificationAbstract { List shardKeys(); + + // Keyless sharding can create a sharded table even without explicit shard keys + @Value.Default + default boolean isSharded() + { + return true; + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java index c2344edfb45..8dae01e0dc5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java @@ -14,9 +14,26 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; +import org.immutables.value.Value; + import java.util.List; public interface StagedFilesDatasetProperties { - List files(); + List filePaths(); + + List filePatterns(); + + @Value.Check + default void validate() + { + if (filePatterns().size() > 0 && filePaths().size() > 0) + { + throw new IllegalArgumentException("Cannot build StagedFilesDatasetProperties, Only one out of filePatterns and filePaths should be provided"); + } + if (filePatterns().size() == 0 && filePaths().size() == 0) + { + throw new IllegalArgumentException("Cannot build StagedFilesDatasetProperties, Either one of filePatterns and filePaths must be provided"); + } + } } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java index 7c0436a9e49..82e5876ed42 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CopyAbstract.java @@ -15,9 +15,11 @@ package org.finos.legend.engine.persistence.components.logicalplan.operations; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import java.util.List; + import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Parameter; import static org.immutables.value.Value.Style; @@ -40,4 +42,7 @@ public interface CopyAbstract extends Operation @Parameter(order = 2) List fields(); + + @Parameter(order = 3) + StagedFilesDatasetProperties stagedFilesDatasetProperties(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java index ac3a3e047fe..7dc3b69359a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java @@ -40,5 +40,6 @@ public enum FunctionName PARSE_DATETIME, OBJECT_CONSTRUCT, TO_VARIANT, - TO_JSON; + TO_JSON, + CONVERT; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java index c2227940d49..7919b249436 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java @@ -22,6 +22,10 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditingAbstract; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditingAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenerationHandler; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; @@ -53,11 +57,23 @@ class AppendOnlyPlanner extends Planner { private final Optional dataSplitInRangeCondition; + private final Optional userProvidedDigest; AppendOnlyPlanner(Datasets datasets, AppendOnly ingestMode, PlannerOptions plannerOptions, Set capabilities) { super(datasets, ingestMode, plannerOptions, capabilities); + // Retrieve user provided digest if present + DigestGenStrategy digestGenStrategy = ingestMode().digestGenStrategy(); + if (digestGenStrategy instanceof UserProvidedDigestGenStrategy) + { + userProvidedDigest = Optional.of(((UserProvidedDigestGenStrategy) digestGenStrategy).digestField()); + } + else + { + userProvidedDigest = Optional.empty(); + } + // Validation // 1. If primary keys are present, then auditing must be turned on and the auditing column must be one of the primary keys if (!primaryKeys.isEmpty()) @@ -65,10 +81,10 @@ class AppendOnlyPlanner extends Planner ingestMode.auditing().accept(new ValidateAuditingForPrimaryKeys(mainDataset())); } - // 2. For filterExistingRecords, we must have digest and primary keys to filter them + // 2. For filterExistingRecords, we must have (user provided) digest and primary keys to filter them if (ingestMode.filterExistingRecords()) { - if (!ingestMode.digestField().isPresent() || primaryKeys.isEmpty()) + if (!userProvidedDigest.isPresent() || primaryKeys.isEmpty()) { throw new IllegalStateException("Primary keys and digest are mandatory for filterExistingRecords"); } @@ -90,6 +106,10 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) List fieldsToSelect = new ArrayList<>(dataFields); List fieldsToInsert = new ArrayList<>(dataFields); + // Add digest generation (if applicable) + ingestMode().digestGenStrategy().accept(new DigestGenerationHandler(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert)); + + // Add auditing (if applicable) if (ingestMode().auditing().accept(AUDIT_ENABLED)) { BatchStartTimestamp batchStartTimestamp = BatchStartTimestamp.INSTANCE; @@ -98,9 +118,9 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources) String auditField = ingestMode().auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(auditField).build()); } - else if (!ingestMode().dataSplitField().isPresent()) + else if (!ingestMode().dataSplitField().isPresent() && !(ingestMode().digestGenStrategy() instanceof UDFBasedDigestGenStrategy)) { - // this is just to print a "*" when we are in the simplest case (no auditing, no data split) + // this is just to print a "*" when we are in the simplest case (no auditing, no data split, no digest generation) fieldsToSelect = LogicalPlanUtils.ALL_COLUMNS(); } @@ -113,9 +133,9 @@ else if (!ingestMode().dataSplitField().isPresent()) List getDigestOrRemainingColumns() { List remainingCols = new ArrayList<>(); - if (ingestMode().digestField().isPresent()) + if (userProvidedDigest.isPresent()) { - remainingCols = Arrays.asList(ingestMode().digestField().get()); + remainingCols = Arrays.asList(userProvidedDigest.get()); } else if (!primaryKeys.isEmpty()) { @@ -143,7 +163,7 @@ private Dataset getSelectStageWithFilterExistingRecords(List fieldsToSele .condition(And.builder() .addConditions( getPrimaryKeyMatchCondition(mainDataset(), stagingDataset(), primaryKeys.toArray(new String[0])), - getDigestMatchCondition(mainDataset(), stagingDataset(), ingestMode().digestField().orElseThrow(IllegalStateException::new))) + getDigestMatchCondition(mainDataset(), stagingDataset(), userProvidedDigest.orElseThrow(IllegalStateException::new))) .build()) .addAllFields(ALL_COLUMNS()) .build())); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 495ca6799f4..db935eec1fb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -21,9 +21,7 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; -import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategyVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenerationHandler; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -42,7 +40,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; -import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; @@ -65,7 +62,7 @@ class BulkLoadPlanner extends Planner private Dataset tempDataset; private StagedFilesDataset stagedFilesDataset; private BulkLoadMetadataDataset bulkLoadMetadataDataset; - private Optional bulkLoadTaskIdValue; + private Optional bulkLoadEventIdValue; BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { @@ -78,7 +75,7 @@ class BulkLoadPlanner extends Planner throw new IllegalArgumentException("Only StagedFilesDataset are allowed under Bulk Load"); } - bulkLoadTaskIdValue = plannerOptions.bulkLoadTaskIdValue(); + bulkLoadEventIdValue = plannerOptions.bulkLoadEventIdValue(); stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); bulkLoadMetadataDataset = bulkLoadMetadataDataset().orElseThrow(IllegalStateException::new); @@ -129,7 +126,7 @@ private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); // Add digest - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert)); + ingestMode().digestGenStrategy().accept(new DigestGenerationHandler(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert)); // Add batch_id field fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); @@ -142,7 +139,7 @@ private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) } Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); - return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); + return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert, stagedFilesDataset.stagedFilesDatasetProperties()))); } private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) @@ -153,7 +150,7 @@ private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) // Operation 1: Copy into a temp table List fieldsToSelectFromStage = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelectFromStage).build(); - operations.add(Copy.of(tempDataset, selectStage, fieldsToSelectFromStage)); + operations.add(Copy.of(tempDataset, selectStage, fieldsToSelectFromStage, stagedFilesDataset.stagedFilesDatasetProperties())); // Operation 2: Transfer from temp table into target table, adding extra columns at the same time @@ -161,7 +158,7 @@ private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) List fieldsToInsertIntoMain = new ArrayList<>(tempDataset.schemaReference().fieldValues()); // Add digest - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), tempDataset, fieldsToSelect, fieldsToInsertIntoMain)); + ingestMode().digestGenStrategy().accept(new DigestGenerationHandler(mainDataset(), tempDataset, fieldsToSelect, fieldsToInsertIntoMain)); // Add batch_id field fieldsToInsertIntoMain.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); @@ -266,9 +263,18 @@ private Selection getRowsBasedOnAppendTimestamp(Dataset dataset, String field, S private String jsonifyBatchSourceInfo(StagedFilesDatasetProperties stagedFilesDatasetProperties) { Map batchSourceMap = new HashMap(); - List files = stagedFilesDatasetProperties.files(); - batchSourceMap.put("files", files); - bulkLoadTaskIdValue.ifPresent(taskId -> batchSourceMap.put("task_id", taskId)); + List filePaths = stagedFilesDatasetProperties.filePaths(); + List filePatterns = stagedFilesDatasetProperties.filePatterns(); + + if (filePaths != null && !filePaths.isEmpty()) + { + batchSourceMap.put("file_paths", filePaths); + } + if (filePatterns != null && !filePatterns.isEmpty()) + { + batchSourceMap.put("file_patterns", filePatterns); + } + bulkLoadEventIdValue.ifPresent(eventId -> batchSourceMap.put("event_id", eventId)); ObjectMapper objectMapper = new ObjectMapper(); try { @@ -279,44 +285,4 @@ private String jsonifyBatchSourceInfo(StagedFilesDatasetProperties stagedFilesDa throw new RuntimeException(e); } } - - - static class DigestGeneration implements DigestGenStrategyVisitor - { - private List fieldsToSelect; - private List fieldsToInsert; - private Dataset stagingDataset; - private Dataset mainDataset; - - public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert) - { - this.mainDataset = mainDataset; - this.stagingDataset = stagingDataset; - this.fieldsToSelect = fieldsToSelect; - this.fieldsToInsert = fieldsToInsert; - } - - @Override - public Void visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy) - { - return null; - } - - @Override - public Void visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) - { - Value digestValue = DigestUdf - .builder() - .udfName(udfBasedDigestGenStrategy.digestUdfName()) - .addAllFieldNames(stagingDataset.schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) - .addAllValues(fieldsToSelect) - .dataset(stagingDataset) - .build(); - String digestField = udfBasedDigestGenStrategy.digestField(); - fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(digestField).build()); - fieldsToSelect.add(digestValue); - return null; - } - } - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 98b1ecf7a11..e3625c81709 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -32,7 +32,9 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; import org.finos.legend.engine.persistence.components.logicalplan.operations.*; import org.finos.legend.engine.persistence.components.logicalplan.values.*; @@ -107,7 +109,7 @@ default boolean enableConcurrentSafety() return false; } - Optional bulkLoadTaskIdValue(); + Optional bulkLoadEventIdValue(); } private final Datasets datasets; @@ -137,6 +139,8 @@ default boolean enableConcurrentSafety() ingestMode.versioningStrategy().accept(new ValidatePrimaryKeysForVersioningStrategy(primaryKeys, this::validatePrimaryKeysNotEmpty)); // 2. Validate if the versioningField is comparable if a versioningStrategy is present validateVersioningField(ingestMode().versioningStrategy(), stagingDataset()); + // 3. cleanupStagingData must be turned off when using DerivedDataset or FilteredDataset + validateCleanUpStagingData(plannerOptions, originalStagingDataset()); } private Optional getTempStagingDataset() @@ -498,6 +502,14 @@ protected void validateVersioningField(VersioningStrategy versioningStrategy, Da } } + protected void validateCleanUpStagingData(PlannerOptions plannerOptions, Dataset dataset) + { + if (plannerOptions.cleanupStagingData() && (dataset instanceof DerivedDataset || dataset instanceof FilteredDataset)) + { + throw new IllegalStateException("cleanupStagingData cannot be turned on when using DerivedDataset or FilteredDataset"); + } + } + // auditing visitor protected static final AuditEnabled AUDIT_ENABLED = new AuditEnabled(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index f7e3d5e6ac4..3ff13255881 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -38,6 +38,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetReferenceImpl; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Join; import org.finos.legend.engine.persistence.components.logicalplan.datasets.JsonExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; @@ -108,6 +109,7 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.ExistsConditionVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.FieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.FieldVisitor; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.FilteredDatasetVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.FunctionVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.GreaterThanEqualToVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.GreaterThanVisitor; @@ -185,6 +187,7 @@ public class AnsiSqlSink extends RelationalSink logicalPlanVisitorByClass.put(DatasetReferenceImpl.class, new DatasetReferenceVisitor()); logicalPlanVisitorByClass.put(DatasetDefinition.class, new DatasetDefinitionVisitor()); logicalPlanVisitorByClass.put(DerivedDataset.class, new DerivedDatasetVisitor()); + logicalPlanVisitorByClass.put(FilteredDataset.class, new FilteredDatasetVisitor()); logicalPlanVisitorByClass.put(JsonExternalDatasetReference.class, new DatasetReferenceVisitor()); logicalPlanVisitorByClass.put(DatasetAdditionalProperties.class, new DatasetAdditionalPropertiesVisitor()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/optimizer/StringCaseOptimizer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/optimizer/StringCaseOptimizer.java index a849683e0bf..ed001f7f44c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/optimizer/StringCaseOptimizer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/optimizer/StringCaseOptimizer.java @@ -110,7 +110,7 @@ else if (component instanceof UniqueTableConstraint) else if (component instanceof ShowCommand) { ShowCommand command = (ShowCommand) component; - command.setSchemaName(applyCase(command.getSchemaName())); + command.setSchemaName(applyCase(command.getSchemaName().orElse(null))); return command; } return component; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DatasetAdditionalPropertiesVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DatasetAdditionalPropertiesVisitor.java index edb8b01b018..73f61d0ba6c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DatasetAdditionalPropertiesVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DatasetAdditionalPropertiesVisitor.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetAdditionalProperties; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.IcebergProperties; import org.finos.legend.engine.persistence.components.logicalplan.datasets.TableOrigin; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.sqldom.tabletypes.CachedTableType; @@ -33,28 +34,30 @@ public class DatasetAdditionalPropertiesVisitor implements LogicalPlanVisitor tags) throw new UnsupportedOperationException("Tags not supported"); } - protected void handleExternalVolume(PhysicalPlanNode prev, String s) + protected void handleIcebergProperties(PhysicalPlanNode prev, IcebergProperties icebergProperties) { - throw new UnsupportedOperationException("External Volume not supported"); + throw new UnsupportedOperationException("Iceberg properties not supported"); } private TableType mapTableType(org.finos.legend.engine.persistence.components.logicalplan.datasets.TableType tableType) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DerivedDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DerivedDatasetVisitor.java index b598c89851e..0c2231672bd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DerivedDatasetVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/DerivedDatasetVisitor.java @@ -25,7 +25,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Optional; public class DerivedDatasetVisitor implements LogicalPlanVisitor { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/FilteredDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/FilteredDatasetVisitor.java new file mode 100644 index 00000000000..f7fec184a9e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/FilteredDatasetVisitor.java @@ -0,0 +1,43 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors; + +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class FilteredDatasetVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, FilteredDataset current, VisitorContext context) + { + Condition filterCondition = current.filter(); + List allColumns = new ArrayList<>(current.schemaReference().fieldValues()); + Selection selection = Selection.builder() + .source(current.datasetReference()) + .addAllFields(allColumns) + .condition(filterCondition) + .alias(current.datasetReference().alias()) + .build(); + return new SelectionVisitor().visit(prev, selection, context); + } +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java index 81934718367..5e574ac08de 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; @@ -59,6 +60,13 @@ select id from (select * from table where condition) conditions.add(filterCondition); logicalPlanNodeList.add(derivedDataset.datasetReference()); } + else if (dataset instanceof FilteredDataset) + { + FilteredDataset filteredDataset = (FilteredDataset) dataset; + Condition filterCondition = filteredDataset.filter(); + conditions.add(filterCondition); + logicalPlanNodeList.add(filteredDataset.datasetReference()); + } else { logicalPlanNodeList.add(dataset); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/ShowVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/ShowVisitor.java index 70e3554d261..ba8c5d6bb72 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/ShowVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/ShowVisitor.java @@ -27,17 +27,10 @@ public class ShowVisitor implements LogicalPlanVisitor @Override public VisitorResult visit(PhysicalPlanNode prev, Show current, VisitorContext context) { - ShowCommand command; - if (current.dataset().datasetReference().group().isPresent()) - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - current.dataset().datasetReference().group().orElse(null)); - } - else - { - command = new ShowCommand(ShowType.valueOf(current.operation().name())); - } + ShowCommand command = new ShowCommand( + ShowType.valueOf(current.operation().name()), + current.dataset().datasetReference().group(), + context.quoteIdentifier()); prev.push(command); return new VisitorResult(null); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 305698ea828..f05009e2402 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -22,6 +22,7 @@ import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.testcases.ingestmode.nontemporal.AppendOnlyTestCases; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.List; @@ -254,6 +255,31 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); } + + @Override + @Test + public void testAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration() + { + // Digest UDF Generation not available for ANSI sink + } + + @Override + public void verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(GeneratorResult operations) + { + } + + @Override + @Test + public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration() + { + // Digest UDF Generation not available for ANSI sink + } + + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(List generatorResults, List dataSplitRanges) + { + } + public RelationalSink getRelationalSink() { return AnsiSqlSink.get(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index 7bc27355818..15d83b7cfe2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -365,6 +365,36 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); } + @Override + public void verifyNontemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + + String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))))," + + "sink.\"name\" = (SELECT stage.\"name\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))))," + + "sink.\"amount\" = (SELECT stage.\"amount\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))))," + + "sink.\"biz_date\" = (SELECT stage.\"biz_date\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))))," + + "sink.\"digest\" = (SELECT stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05')))) " + + "WHERE EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\")) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))))"; + + String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE (NOT (EXISTS " + + "(SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"id\" = stage.\"id\") AND " + + "(sink.\"name\" = stage.\"name\")))) AND ((stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))"; + // Stats + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + @Override public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java index f95df9e8b3d..270cc490e48 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java @@ -297,6 +297,36 @@ public void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithStagingFilters("{\"batch_id_in\":{\"GT\":5}}"), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + + "WHERE (sink.\"batch_id_out\" = 999999999) AND " + + "(EXISTS (SELECT * FROM \"mydb\".\"staging\" as stage " + + "WHERE (((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND " + + "(sink.\"digest\" <> stage.\"digest\")) AND (stage.\"batch_id_in\" > 5)))"; + + String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging\" as stage " + + "WHERE (NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + + "AND (sink.\"digest\" = stage.\"digest\") AND ((sink.\"id\" = stage.\"id\") AND " + + "(sink.\"name\" = stage.\"name\"))))) AND (stage.\"batch_id_in\" > 5))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations) { @@ -343,6 +373,52 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithStagingFilters("{\"batch_id_in\":{\"GT\":5}}"), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 " + + "FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + + "WHERE (sink.\"batch_id_out\" = 999999999) AND (EXISTS " + + "(SELECT * FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (stage.\"version\" > sink.\"version\")))"; + + String expectedUpsertQuery = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"batch_id_in\", \"batch_id_out\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "(SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'),999999999 FROM \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "as stage WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = 999999999) " + + "AND (stage.\"version\" <= sink.\"version\") AND ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")))))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + Assertions.assertEquals(expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"version\", \"legend_persistence_count\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\" FROM " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\",stage.\"version\"," + + "stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER " + + "(PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"version\" DESC) as \"legend_persistence_rank\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\"," + + "stage.\"version\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + + "WHERE stage.\"batch_id_in\" > 5 GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\", " + + "stage.\"digest\", stage.\"version\") as stage) as stage WHERE stage.\"legend_persistence_rank\" = 1)"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(dataErrorCheckSql, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 62c3df96ff0..24d4174b2f4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -30,6 +30,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Truncate; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; @@ -62,6 +63,7 @@ import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.FieldVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.PartitionKeyVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.SQLCreateVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.SQLDropVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.SchemaDefinitionVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.StagedFilesDatasetReferenceVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.StagedFilesDatasetVisitor; @@ -107,6 +109,7 @@ public class BigQuerySink extends AnsiSqlSink Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); logicalPlanVisitorByClass.put(SchemaDefinition.class, new SchemaDefinitionVisitor()); logicalPlanVisitorByClass.put(Create.class, new SQLCreateVisitor()); + logicalPlanVisitorByClass.put(Drop.class, new SQLDropVisitor()); logicalPlanVisitorByClass.put(ClusterKey.class, new ClusterKeyVisitor()); logicalPlanVisitorByClass.put(PartitionKey.class, new PartitionKeyVisitor()); logicalPlanVisitorByClass.put(Alter.class, new AlterVisitor()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 261226baa04..d5e8a34de4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -160,7 +160,7 @@ public boolean doesTableExist(Dataset dataset) public void validateDatasetSchema(Dataset dataset, TypeMapping typeMapping) { - if (!(typeMapping instanceof DataTypeMapping)) + if (!(typeMapping instanceof DataTypeMapping)) { throw new IllegalStateException("Only DataTypeMapping allowed in validateDatasetSchema"); } @@ -184,7 +184,19 @@ public void validateDatasetSchema(Dataset dataset, TypeMapping typeMapping) Integer columnSize = Objects.nonNull(dbField.getMaxLength()) ? Integer.valueOf(dbField.getMaxLength().intValue()) : Objects.nonNull(dbField.getPrecision()) ? Integer.valueOf(dbField.getPrecision().intValue()) : null; Integer scale = Objects.nonNull(dbField.getScale()) ? Integer.valueOf(dbField.getScale().intValue()) : null; - org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType matchedDataType = org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.valueOf(typeName.toUpperCase()); + org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType matchedDataType = null; + for (org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType dataType : org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.values()) + { + if (dataType.name().equalsIgnoreCase(typeName)) + { + matchedDataType = dataType; + break; + } + } + if (matchedDataType == null) + { + matchedDataType = org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType.valueOf(dbField.getType().name()); + } FieldType fieldType = FieldType.of(matchedDataType, columnSize, scale); DataType physicalDataType = datatypeMapping.getDataType(fieldType); @@ -213,7 +225,7 @@ public Dataset constructDatasetFromDatabase(Dataset dataset, TypeMapping typeMap String tableName = dataset.datasetReference().name().orElseThrow(IllegalStateException::new); String schemaName = dataset.datasetReference().group().orElse(null); String databaseName = dataset.datasetReference().database().orElse(null); - if (!(typeMapping instanceof JdbcPropertiesToLogicalDataTypeMapping)) + if (!(typeMapping instanceof JdbcPropertiesToLogicalDataTypeMapping)) { throw new IllegalStateException("Only JdbcPropertiesToLogicalDataTypeMapping allowed in constructDatasetFromDatabase"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java index 088fed20cb4..564e0a1f83e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java @@ -15,13 +15,11 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; -import org.finos.legend.engine.persistence.components.common.LoadOptions; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; -import java.util.Optional; - +import java.util.Map; @Value.Immutable @Value.Style( @@ -33,7 +31,7 @@ ) public interface BigQueryStagedFilesDatasetPropertiesAbstract extends StagedFilesDatasetProperties { - FileFormat fileFormat(); + FileFormatType fileFormat(); - Optional loadOptions(); + Map loadOptions(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DatetimeValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DatetimeValueVisitor.java index 939b782f595..ecbd4165707 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DatetimeValueVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DatetimeValueVisitor.java @@ -26,7 +26,7 @@ public class DatetimeValueVisitor implements LogicalPlanVisitor { - private static final String DATE_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"; + private static final String DATE_TIME_FORMAT = "%Y-%m-%d %H:%M:%E6S"; @Override public VisitorResult visit(PhysicalPlanNode prev, DatetimeValue current, VisitorContext context) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/SQLDropVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/SQLDropVisitor.java new file mode 100644 index 00000000000..8bc82df6867 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/SQLDropVisitor.java @@ -0,0 +1,47 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.modifiers.IfExistsTableModifier; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements.DropTable; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class SQLDropVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, Drop current, VisitorContext context) + { + DropTable dropTable = new DropTable(); + prev.push(dropTable); + + List logicalPlanNodes = new ArrayList<>(); + logicalPlanNodes.add(current.dataset()); + + if (current.ifExists()) + { + logicalPlanNodes.add(IfExistsTableModifier.INSTANCE); + } + + return new VisitorResult(dropTable, logicalPlanNodes); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java index 0c5e7d91bc5..a8cae186d4b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -14,8 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; -import org.finos.legend.engine.persistence.components.common.FileFormat; -import org.finos.legend.engine.persistence.components.common.LoadOptions; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; @@ -24,6 +23,7 @@ import org.finos.legend.engine.persistence.components.transformer.VisitorContext; import java.util.HashMap; +import java.util.List; import java.util.Map; @@ -38,39 +38,13 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu } BigQueryStagedFilesDatasetProperties datasetProperties = (BigQueryStagedFilesDatasetProperties) current.properties(); - Map loadOptionsMap = new HashMap<>(); - FileFormat fileFormat = datasetProperties.fileFormat(); - loadOptionsMap.put("format", fileFormat.name()); - datasetProperties.loadOptions().ifPresent(options -> retrieveLoadOptions(fileFormat, options, loadOptionsMap)); - - StagedFilesTable stagedFilesTable = new StagedFilesTable(datasetProperties.files(), loadOptionsMap); + FileFormatType fileFormatType = datasetProperties.fileFormat(); + Map loadOptionsMap = new HashMap<>(datasetProperties.loadOptions()); + loadOptionsMap.put("format", fileFormatType.name()); + List uris = datasetProperties.filePaths().isEmpty() ? datasetProperties.filePatterns() : datasetProperties.filePaths(); + StagedFilesTable stagedFilesTable = new StagedFilesTable(uris, loadOptionsMap); prev.push(stagedFilesTable); return new VisitorResult(null); } - - private void retrieveLoadOptions(FileFormat fileFormat, LoadOptions loadOptions, Map loadOptionsMap) - { - switch (fileFormat) - { - case CSV: - loadOptions.fieldDelimiter().ifPresent(property -> loadOptionsMap.put("field_delimiter", property)); - loadOptions.encoding().ifPresent(property -> loadOptionsMap.put("encoding", property)); - loadOptions.nullMarker().ifPresent(property -> loadOptionsMap.put("null_marker", property)); - loadOptions.quote().ifPresent(property -> loadOptionsMap.put("quote", property)); - loadOptions.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property)); - loadOptions.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); - loadOptions.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); - break; - case JSON: - loadOptions.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); - loadOptions.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); - break; - case AVRO: - case PARQUET: - return; - default: - throw new IllegalStateException("Unrecognized file format: " + fileFormat); - } - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java index 2a8d288eeb4..ba96cfb5ba4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.ASSIGNMENT_OPERATOR; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; @@ -68,7 +69,7 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(COMMA); builder.append(WHITE_SPACE); int ctr = 0; - for (String option : loadOptions.keySet()) + for (String option : loadOptions.keySet().stream().sorted().collect(Collectors.toList())) { ctr++; builder.append(option); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/DropTable.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/DropTable.java new file mode 100644 index 00000000000..7377308689f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/DropTable.java @@ -0,0 +1,85 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.modifiers.TableModifier; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DDLStatement; + +import java.util.ArrayList; +import java.util.List; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class DropTable implements DDLStatement +{ + private Table table; + private List modifiers = new ArrayList<>(); + + public DropTable() + { + } + + public DropTable(Table table, List modifiers) + { + this.table = table; + this.modifiers = modifiers; + } + + /* + DROP GENERIC PLAN: + DROP TABLE NAME [modifiers] + */ + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + validate(); + builder.append(Clause.DROP.get()); + builder.append(WHITE_SPACE); + builder.append(Clause.TABLE.get()); + + // modifiers + SqlGen.genSqlList(builder, modifiers, WHITE_SPACE, WHITE_SPACE); + + // Table name + builder.append(WHITE_SPACE); + table.genSqlWithoutAlias(builder); + } + + + @Override + public void push(Object node) + { + if (node instanceof Table) + { + table = ((Table) node); + } + else if (node instanceof TableModifier) + { + modifiers.add((TableModifier) node); + } + } + + void validate() throws SqlDomException + { + if (table == null) + { + throw new SqlDomException("Table is mandatory for Drop Table Command"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyExecutorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyExecutorTest.java index c1fec8890b9..e8feb183d24 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyExecutorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyExecutorTest.java @@ -20,6 +20,9 @@ import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; @@ -40,7 +43,8 @@ public class AppendOnlyExecutorTest extends BigQueryEndToEndTest public void testMilestoning() throws IOException, InterruptedException { AppendOnly ingestMode = AppendOnly.builder() - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) + .filterExistingRecords(true) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField("audit_ts").build()) .build(); @@ -83,4 +87,68 @@ public void testMilestoning() throws IOException, InterruptedException Assertions.assertEquals(3, incomingRecords); Assertions.assertEquals(2, rowsInserted); } + + @Test + public void testMilestoningWithDigestGeneration() throws IOException, InterruptedException + { + AppendOnly ingestMode = AppendOnly.builder() + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName("demo.LAKEHOUSE_MD5").digestField(digestName).build()) + .auditing(DateTimeAuditing.builder().dateTimeField("audit_ts").build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + + SchemaDefinition stagingSchema = SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(amount) + .addFields(bizDate) + .addFields(insertTimestamp) + .build(); + + // Clean up + delete("demo", "main"); + delete("demo", "staging"); + + // Register UDF + runQuery("DROP FUNCTION IF EXISTS demo.stringifyJson;"); + runQuery("DROP FUNCTION IF EXISTS demo.LAKEHOUSE_MD5;"); + runQuery("CREATE FUNCTION demo.stringifyJson(json_data JSON)\n" + + " RETURNS STRING\n" + + " LANGUAGE js AS \"\"\"\n" + + " let output = \"\"; \n" + + " Object.keys(json_data).sort().filter(field => json_data[field] != null).forEach(field => { output += field; output += json_data[field];})\n" + + " return output;\n" + + " \"\"\"; \n"); + runQuery("CREATE FUNCTION demo.LAKEHOUSE_MD5(json_data JSON)\n" + + "AS (\n" + + " TO_HEX(MD5(demo.stringifyJson(json_data)))\n" + + ");\n"); + + // Pass 1 + System.out.println("--------- Batch 1 started ------------"); + String pathPass1 = "src/test/resources/input/digest_generation/data_pass3.csv"; + DatasetFilter stagingFilter = DatasetFilter.of("insert_ts", FilterType.EQUAL_TO, "2023-01-01 00:00:00"); + IngestorResult result = ingestViaExecutor(ingestMode, stagingSchema, stagingFilter, pathPass1, fixedClock_2000_01_01); + + // Verify + List> tableData = runQuery("select * from `demo`.`main` order by name asc"); + String expectedPath = "src/test/resources/expected/append/digest_generation/data_pass3.csv"; + String [] schema = new String[] {"id", "name", "amount", "biz_date", "insert_ts", "digest", "audit_ts"}; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + long incomingRecords = (long) result.statisticByName().get(INCOMING_RECORD_COUNT); + Assertions.assertEquals(3, incomingRecords); + + // Pass 2 + System.out.println("--------- Batch 2 started ------------"); + String pathPass2 = "src/test/resources/input/digest_generation/data_pass4.csv"; + stagingFilter = DatasetFilter.of("insert_ts", FilterType.EQUAL_TO, "2023-01-02 00:00:00"); + result = ingestViaExecutor(ingestMode, stagingSchema, stagingFilter, pathPass2, fixedClock_2000_01_02); + + // Verify + tableData = runQuery("select * from `demo`.`main` order by name asc, insert_ts"); + expectedPath = "src/test/resources/expected/append/digest_generation/data_pass4.csv"; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + incomingRecords = (long) result.statisticByName().get(INCOMING_RECORD_COUNT); + Assertions.assertEquals(3, incomingRecords); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyGeneratorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyGeneratorTest.java index 6ffa34b0909..b9158ee56b1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyGeneratorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/AppendOnlyGeneratorTest.java @@ -18,7 +18,10 @@ import org.finos.legend.engine.persistence.components.common.FilterType; import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; -import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -34,8 +37,8 @@ public class AppendOnlyGeneratorTest extends BigQueryEndToEndTest public void testMilestoning() throws IOException, InterruptedException { AppendOnly ingestMode = AppendOnly.builder() - .digestField("digest") - .deduplicationStrategy(FilterDuplicates.builder().build()) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) + .filterExistingRecords(true) .auditing(DateTimeAuditing.builder().dateTimeField("audit_ts").build()) .build(); @@ -66,4 +69,62 @@ public void testMilestoning() throws IOException, InterruptedException expectedPath = "src/test/resources/expected/append/data_pass2.csv"; assertFileAndTableDataEquals(schema, expectedPath, tableData); } + + @Test + public void testMilestoningWithDigestGeneration() throws IOException, InterruptedException + { + AppendOnly ingestMode = AppendOnly.builder() + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName("demo.LAKEHOUSE_MD5").digestField(digestName).build()) + .auditing(NoAuditing.builder().build()) + .build(); + + SchemaDefinition stagingSchema = SchemaDefinition.builder() + .addFields(nameNonPk) + .addFields(amount) + .addFields(bizDate) + .addFields(insertTimestamp) + .build(); + + // Clean up + delete("demo", "main"); + delete("demo", "staging"); + + // Register UDF + runQuery("DROP FUNCTION IF EXISTS demo.stringifyJson;"); + runQuery("DROP FUNCTION IF EXISTS demo.LAKEHOUSE_MD5;"); + runQuery("CREATE FUNCTION demo.stringifyJson(json_data JSON)\n" + + " RETURNS STRING\n" + + " LANGUAGE js AS \"\"\"\n" + + " let output = \"\"; \n" + + " Object.keys(json_data).sort().filter(field => json_data[field] != null).forEach(field => { output += field; output += json_data[field];})\n" + + " return output;\n" + + " \"\"\"; \n"); + runQuery("CREATE FUNCTION demo.LAKEHOUSE_MD5(json_data JSON)\n" + + "AS (\n" + + " TO_HEX(MD5(demo.stringifyJson(json_data)))\n" + + ");\n"); + + // Pass 1 + System.out.println("--------- Batch 1 started ------------"); + String pathPass1 = "src/test/resources/input/digest_generation/data_pass1.csv"; + DatasetFilter stagingFilter = DatasetFilter.of("insert_ts", FilterType.EQUAL_TO, "2023-01-01 00:00:00"); + ingestViaGenerator(ingestMode, stagingSchema, stagingFilter, pathPass1, fixedClock_2000_01_01); + + // Verify + List> tableData = runQuery("select * from `demo`.`main` order by name asc"); + String expectedPath = "src/test/resources/expected/append/digest_generation/data_pass1.csv"; + String [] schema = new String[] {"name", "amount", "biz_date", "insert_ts", "digest"}; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + + // Pass 2 + System.out.println("--------- Batch 2 started ------------"); + String pathPass2 = "src/test/resources/input/digest_generation/data_pass2.csv"; + stagingFilter = DatasetFilter.of("insert_ts", FilterType.EQUAL_TO, "2023-01-02 00:00:00"); + ingestViaGenerator(ingestMode, stagingSchema, stagingFilter, pathPass2, fixedClock_2000_01_02); + + // Verify + tableData = runQuery("select * from `demo`.`main` order by name asc, insert_ts"); + expectedPath = "src/test/resources/expected/append/digest_generation/data_pass2.csv"; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java index b4655ac3a60..ffb9b341902 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BigQueryEndToEndTest.java @@ -76,6 +76,7 @@ public class BigQueryEndToEndTest protected static String digestName = "digest"; protected Field id = Field.builder().name("id").type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())).primaryKey(true).build(); protected Field name = Field.builder().name("name").type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())).primaryKey(true).build(); + protected Field nameNonPk = Field.builder().name("name").type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())).build(); protected Field amount = Field.builder().name("amount").type(FieldType.of(DataType.INTEGER, Optional.empty(), Optional.empty())).build(); protected Field bizDate = Field.builder().name("biz_date").type(FieldType.of(DataType.DATE, Optional.empty(), Optional.empty())).build(); protected Field digest = Field.builder().name(digestName).type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())).build(); @@ -134,7 +135,7 @@ public class BigQueryEndToEndTest .build(); protected IngestorResult ingestViaExecutorAndVerifyStagingFilters(IngestMode ingestMode, SchemaDefinition stagingSchema, - DatasetFilter stagingFilter, String path, Clock clock, boolean VerifyStagingFilters) throws IOException, InterruptedException + DatasetFilter stagingFilter, String path, Clock clock, boolean needToVerifyStagingFilters) throws IOException, InterruptedException { RelationalIngestor ingestor = RelationalIngestor.builder() .ingestMode(ingestMode) @@ -158,7 +159,10 @@ protected IngestorResult ingestViaExecutorAndVerifyStagingFilters(IngestMode ing RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets).get(0); - verifyStagingFilters(ingestor, connection, datasets); + if (needToVerifyStagingFilters) + { + verifyStagingFilters(ingestor, connection, datasets); + } return ingestorResult; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java index 559c2a0f5db..a608a8329e1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java @@ -15,8 +15,7 @@ package org.finos.legend.engine.persistence.components.e2e; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; -import org.finos.legend.engine.persistence.components.common.LoadOptions; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; @@ -40,10 +39,7 @@ import org.junit.jupiter.api.Test; import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_WITH_ERRORS; @@ -89,8 +85,8 @@ public void testMilestoning() throws IOException, InterruptedException Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(FILE_LIST).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(FILE_LIST).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -116,7 +112,7 @@ public void testMilestoning() throws IOException, InterruptedException .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .build(); RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); @@ -144,12 +140,14 @@ public void testMilestoningFailure() throws IOException, InterruptedException .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); + Map loadOptions = new HashMap<>(); + loadOptions.put("max_bad_records", 10L); Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .loadOptions(LoadOptions.builder().maxBadRecords(10L).build()) - .addAllFiles(BAD_FILE_LIST).build()) + .fileFormat(FileFormatType.CSV) + .putAllLoadOptions(loadOptions) + .addAllFilePaths(BAD_FILE_LIST).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java index 410dabb69e6..c77230b7576 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.e2e; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; @@ -81,8 +81,8 @@ public void testMilestoning() throws IOException, InterruptedException Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(FILE_LIST).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(FILE_LIST).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -108,7 +108,7 @@ public void testMilestoning() throws IOException, InterruptedException .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .bulkLoadBatchStatusPattern("{STATUS}") .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index d0995b05d59..662d73d7cf9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -18,8 +18,12 @@ import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; +import org.finos.legend.engine.persistence.components.scenarios.AppendOnlyScenarios; +import org.finos.legend.engine.persistence.components.scenarios.TestScenario; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.List; @@ -64,7 +68,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExisti { String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; @@ -99,7 +103,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesNoVersioningWithFilterEx List deduplicationAndVersioningSql = queries.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -131,7 +135,7 @@ public void verifyAppendOnlyWithAuditingFilterDuplicatesAllVersionWithFilterExis { String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + @@ -169,7 +173,7 @@ public void verifyAppendOnlyWithUpperCaseOptimizer(GeneratorResult operations) String insertSql = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_UPDATE_TIME`) " + - "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage " + "WHERE NOT (EXISTS " + "(SELECT * FROM `MYDB`.`MAIN` as sink WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; @@ -184,7 +188,7 @@ public void verifyAppendOnlyWithLessColumnsInStaging(GeneratorResult operations) List milestoningSqlList = operations.ingestSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -200,7 +204,7 @@ public void verifyAppendOnlyWithAuditingFailOnDuplicatesMaxVersionWithFilterExis List deduplicationAndVersioningSql = operations.deduplicationAndVersioningSql(); String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -231,7 +235,7 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTablePlusDigestWithCount, preActionsSqlList.get(1)); @@ -250,4 +254,87 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); } + + @Override + @Test + public void testAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration() + { + TestScenario scenario = new AppendOnlyScenarios().NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(operations); + } + + @Override + public void verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + + String insertSql = "INSERT INTO `mydb`.`main` (`id`, `name`, `amount`, `biz_date`, `digest`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,LAKEHOUSE_MD5(TO_JSON(stage)) FROM `mydb`.`staging` as stage)"; + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableCreateQueryWithNoPKs, preActionsSqlList.get(0)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertNull(operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + } + + @Override + @Test + public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration() + { + TestScenario scenario = new AppendOnlyScenarios().WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); + verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(operations, dataSplitRangesOneToTwo); + } + + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(List generatorResults, List dataSplitRanges) + { + String insertSql = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,LAKEHOUSE_MD5(TO_JSON(stage)),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithCountAndDataSplit, generatorResults.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, generatorResults.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates, generatorResults.get(0).deduplicationAndVersioningSql().get(1)); + + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), generatorResults.get(0).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), generatorResults.get(1).ingestSql().get(0)); + Assertions.assertEquals(2, generatorResults.size()); + + // Stats + String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsInserted = "SELECT COUNT(*) as `rowsInserted` FROM `mydb`.`main` as sink WHERE sink.`batch_update_time` = (SELECT MAX(sink.`batch_update_time`) FROM `mydb`.`main` as sink)"; + + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsDeleted, generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertEquals(enrichSqlWithDataSplits(rowsInserted, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + Assertions.assertEquals(rowsTerminated, generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java index 51f7e0eec10..ea28d0d8cf2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BigQueryTestArtifacts.java @@ -60,6 +60,15 @@ public class BigQueryTestArtifacts "`biz_date` DATE," + "`legend_persistence_count` INT64)"; + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`biz_date` DATE," + + "`digest` STRING," + + "`version` INT64," + + "`legend_persistence_count` INT64)"; + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + "(`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + @@ -77,6 +86,14 @@ public class BigQueryTestArtifacts "`legend_persistence_count` INT64," + "`data_split` INT64 NOT NULL)"; + public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INT64 NOT NULL," + + "`name` STRING NOT NULL," + + "`amount` FLOAT64," + + "`biz_date` DATE," + + "`legend_persistence_count` INT64," + + "`data_split` INT64 NOT NULL)"; + public static String expectedBaseTablePlusDigestPlusVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INT64 NOT NULL," + "`name` STRING NOT NULL," + @@ -167,7 +184,7 @@ public class BigQueryTestArtifacts public static String expectedTempStagingCleanupQuery = "DELETE FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE 1 = 1"; - public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging` CASCADE"; + public static String expectedDropTableQuery = "DROP TABLE IF EXISTS `mydb`.`staging`"; public static String cleanUpMainTableSql = "DELETE FROM `mydb`.`main` as sink WHERE 1 = 1"; public static String cleanupMainTableSqlUpperCase = "DELETE FROM `MYDB`.`MAIN` as sink WHERE 1 = 1"; @@ -197,19 +214,19 @@ public class BigQueryTestArtifacts "`BATCH_ID_IN` INT64 NOT NULL,`BATCH_ID_OUT` INT64,PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`) NOT ENFORCED)"; public static String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`)" + - " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'DONE')"; + " (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'DONE')"; public static String expectedMetadataTableIngestWithStagingFiltersQuery = "INSERT INTO batch_metadata " + "(`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `staging_filters`) " + "(SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')," + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')," + "CURRENT_DATETIME(),'DONE',PARSE_JSON('{\"batch_id_in\":{\"GT\":5}}'))"; public static String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`)" + - " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'DONE')"; + " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'DONE')"; public static String expectedMetadataTableIngestQueryWithPlaceHolders = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) " + - "(SELECT 'main',{BATCH_ID_PATTERN},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_END_TS_PATTERN}'),'DONE')"; + "(SELECT 'main',{BATCH_ID_PATTERN},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_END_TS_PATTERN}'),'DONE')"; public static String expectedMainTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INT64 NOT NULL," + @@ -471,13 +488,23 @@ public class BigQueryTestArtifacts "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; + public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `legend_persistence_count`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`) as stage)"; + public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + "`mydb`.`staging_legend_persistence_temp_staging` as stage"; - public static String dataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + public static String dataErrorCheckSqlForBizDateAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + public static String dataErrorCheckSqlForVersionAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version`) as stage"; + public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE 1 = 1"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `LEGEND_PERSISTENCE_COUNT`) " + @@ -497,4 +524,9 @@ public class BigQueryTestArtifacts "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + + public static String getDropTempTableQuery(String tableName) + { + return String.format("DROP TABLE IF EXISTS %s", tableName); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java index cc61379a451..8bfe3ec190a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java @@ -71,7 +71,7 @@ public void verifyBitemporalDeltaBatchIdDateTimeBasedNoDeleteIndWithDataSplits(L { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + "((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -81,7 +81,7 @@ public void verifyBitemporalDeltaBatchIdDateTimeBasedNoDeleteIndWithDataSplits(L "`validity_through_target`, `digest`, `version`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`validity_through_reference`," + "stage.`digest`,stage.`version`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + - "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE " + "(sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -153,8 +153,8 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND " + "(stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + @@ -165,9 +165,9 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) AND " + @@ -185,10 +185,10 @@ public void verifyBitemporalDeltaDatetimeBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -206,7 +207,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`staging` as stage WHERE stage.`delete_indicator` NOT IN ('yes','1','true')) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -275,7 +276,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator String expectedTempToMainForDeletion = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + "FROM `mydb`.`tempWithDeleteIndicator` as legend_persistence_x " + "LEFT OUTER JOIN `mydb`.`tempWithDeleteIndicator` as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_from_target` > legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + @@ -346,7 +347,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -415,7 +416,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + @@ -483,7 +484,7 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplitsFilterDuplic "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`stagingWithoutDuplicates` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -561,7 +562,7 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`stagingWithoutDuplicates` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -659,7 +660,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplitsFilterDupl "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`stagingWithoutDuplicates` as stage WHERE stage.`delete_indicator` NOT IN ('yes','1','true')) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -728,7 +729,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplitsFilterDupl String expectedTempToMainForDeletion = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + "FROM `mydb`.`tempWithDeleteIndicator` as legend_persistence_x " + "LEFT OUTER JOIN `mydb`.`tempWithDeleteIndicator` as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_from_target` > legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + @@ -819,7 +820,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM " + stageWithoutDuplicatesName + " as legend_persistence_stageWithoutDuplicates WHERE (legend_persistence_stageWithoutDuplicates.`delete_indicator` NOT IN ('yes','1','true')) AND ((legend_persistence_stageWithoutDuplicates.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (legend_persistence_stageWithoutDuplicates.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -888,7 +889,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu String expectedTempToMainForDeletion = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `version`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`legend_persistence_start_date` as `legend_persistence_start_date`,MAX(legend_persistence_y.`validity_through_target`) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`version`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`validity_from_target`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date`,legend_persistence_x.`batch_id_in`,legend_persistence_x.`batch_id_out` " + "FROM " + tempWithDeleteIndicatorName + " as legend_persistence_x " + "LEFT OUTER JOIN " + tempWithDeleteIndicatorName + " as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_y.`validity_from_target` > legend_persistence_x.`validity_from_target`) AND (legend_persistence_y.`delete_indicator` = 0) " + @@ -958,7 +959,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithPlaceholders(GeneratorResult op "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1024,13 +1025,13 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`," + - "legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "legend_persistence_y.`legend_persistence_end_date`,(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1046,7 +1047,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedMainToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM " + "(SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_id_in`,sink.`batch_id_out`,sink.`batch_time_in`," + "sink.`batch_time_out`,sink.`validity_from_target`,sink.`validity_through_target` FROM `mydb`.`main` as sink " + @@ -1070,7 +1071,7 @@ public void verifyBitemporalDeltaBatchIdAndTimeBasedNoDeleteIndNoDataSplits(Gene String expectedUpdateMain = "UPDATE `mydb`.`main` as sink SET " + "sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (EXISTS " + "(SELECT * FROM `mydb`.`temp` as temp WHERE ((sink.`id` = temp.`id`) AND (sink.`name` = temp.`name`)) " + "AND (sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_id_out` = 999999999)"; @@ -1105,17 +1106,17 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR String expectedStageToTemp = "INSERT INTO `mydb`.`temp` " + "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`,legend_persistence_x.`validity_from_reference` as `legend_persistence_start_date`," + - "legend_persistence_y.`legend_persistence_end_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "legend_persistence_y.`legend_persistence_end_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`validity_from_reference`,stage.`digest` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),MIN(legend_persistence_x.`legend_persistence_end_date`)) as `legend_persistence_end_date` " + "FROM " + - "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`,COALESCE(MIN(legend_persistence_y.`legend_persistence_start_date`),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as `legend_persistence_end_date` " + "FROM " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`staging` as stage) as legend_persistence_x " + "LEFT OUTER JOIN " + - "(SELECT `id`,`name`,`validity_from_target` as `legend_persistence_start_date` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as legend_persistence_y " + + "(SELECT `id`,`name`,`validity_from_target` as `legend_persistence_start_date` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) AND (legend_persistence_x.`legend_persistence_start_date` < legend_persistence_y.`legend_persistence_start_date`) " + "GROUP BY legend_persistence_x.`id`, legend_persistence_x.`name`, legend_persistence_x.`legend_persistence_start_date`) as legend_persistence_x " + "LEFT OUTER JOIN " + @@ -1128,16 +1129,16 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "(`id`, `name`, `amount`, `digest`, `validity_from_target`, `validity_through_target`, `batch_time_in`, `batch_time_out`) " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`amount`,legend_persistence_x.`digest`," + "legend_persistence_x.`validity_from_target` as `legend_persistence_start_date`,legend_persistence_y.`legend_persistence_end_date`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM (SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_time_in`," + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM (SELECT sink.`id`,sink.`name`,sink.`amount`,sink.`digest`,sink.`batch_time_in`," + "sink.`batch_time_out`,sink.`validity_from_target`,sink.`validity_through_target` " + - "FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as legend_persistence_x " + + "FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as legend_persistence_x " + "INNER JOIN " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`," + "legend_persistence_x.`legend_persistence_end_date` as `legend_persistence_end_date` FROM " + "(SELECT legend_persistence_x.`id`,legend_persistence_x.`name`,legend_persistence_x.`legend_persistence_start_date`," + "MIN(legend_persistence_y.`legend_persistence_start_date`) as `legend_persistence_end_date` " + "FROM (SELECT `id`,`name`,`validity_from_target` as `legend_persistence_start_date`,`validity_through_target` as `legend_persistence_end_date` " + - "FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) as legend_persistence_x " + + "FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) as legend_persistence_x " + "INNER JOIN " + "(SELECT `id`,`name`,`validity_from_reference` as `legend_persistence_start_date` FROM `mydb`.`staging` as stage) as legend_persistence_y " + "ON ((legend_persistence_x.`id` = legend_persistence_y.`id`) AND (legend_persistence_x.`name` = legend_persistence_y.`name`)) " + @@ -1151,10 +1152,10 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR "AND (legend_persistence_x.`validity_from_target` = legend_persistence_y.`legend_persistence_start_date`))"; String expectedUpdateMain = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (EXISTS (SELECT * FROM `mydb`.`temp` as temp WHERE " + "((sink.`id` = temp.`id`) AND (sink.`name` = temp.`name`)) AND " + - "(sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))"; + "(sink.`validity_from_target` = temp.`validity_from_target`))) AND (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59'))"; String expectedTempToMain = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `digest`, `batch_time_in`, `batch_time_out`, `validity_from_target`, `validity_through_target`) " + @@ -1173,8 +1174,8 @@ public void verifyBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 4d8899447e4..801e8dcc7ac 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -15,8 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; -import org.finos.legend.engine.persistence.components.common.LoadOptions; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; @@ -40,10 +39,7 @@ import java.time.Clock; import java.time.ZoneOffset; import java.time.ZonedDateTime; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_DELETED; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; @@ -102,8 +98,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(filesList).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -117,7 +113,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .batchIdPattern("{NEXT_BATCH_ID}") .build(); @@ -137,11 +133,11 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT {NEXT_BATCH_ID},'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"xyz123\"}'))"; + "(SELECT {NEXT_BATCH_ID},'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"xyz123\",\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); @@ -151,7 +147,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -163,20 +159,21 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoTaskId() .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); + Map loadOptions = new HashMap<>(); + loadOptions.put("encoding", "UTF8"); + loadOptions.put("max_bad_records", 100L); + loadOptions.put("null_marker", "NULL"); + loadOptions.put("quote", "'"); + loadOptions.put("compression", "GZIP"); + loadOptions.put("field_delimiter", ","); + loadOptions.put("skip_leading_rows", 1L); + Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .loadOptions(LoadOptions.builder() - .encoding("UTF8") - .maxBadRecords(100L) - .nullMarker("NULL") - .quote("'") - .compression("GZIP") - .fieldDelimiter(",") - .skipLeadingRows(1L) - .build()) - .addAllFiles(filesList).build()) + .fileFormat(FileFormatType.CSV) + .putAllLoadOptions(loadOptions) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -205,16 +202,16 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoTaskId() String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + "FROM FILES " + - "(uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], max_bad_records=100, quote=''', skip_leading_rows=1, format='CSV', encoding='UTF8', compression='GZIP', field_delimiter=',', null_marker='NULL')"; + "(uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], compression='GZIP', encoding='UTF8', field_delimiter=',', format='CSV', max_bad_records=100, null_marker='NULL', quote=''', skip_leading_rows=1)"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + - "PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + + "PARSE_JSON('{\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); @@ -224,7 +221,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoTaskId() Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -239,8 +236,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(filesList).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -254,7 +251,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -297,8 +294,8 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(filesList).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -312,7 +309,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -330,7 +327,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `digest`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -340,7 +337,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -355,8 +352,8 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(filesList).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -370,7 +367,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -389,7 +386,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() String expectedInsertSql = "INSERT INTO `MY_DB`.`MY_NAME` " + "(`COL_INT`, `COL_STRING`, `COL_DECIMAL`, `COL_DATETIME`, `COL_VARIANT`, `DIGEST`, `BATCH_ID`, `APPEND_TIME`) " + - "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -399,7 +396,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() Assertions.assertEquals("SELECT 0 as `ROWSDELETED`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `ROWSTERMINATED`", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as `ROWSUPDATED`", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as `ROWSINSERTED` FROM `MY_DB`.`MY_NAME` as my_alias WHERE my_alias.`APPEND_TIME` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); + Assertions.assertEquals("SELECT COUNT(*) as `ROWSINSERTED` FROM `MY_DB`.`MY_NAME` as my_alias WHERE my_alias.`APPEND_TIME` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')", statsSql.get(ROWS_INSERTED)); } @Test @@ -462,7 +459,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(BigQuerySink.get()) - .bulkLoadTaskIdValue(TASK_ID_VALUE) + .bulkLoadEventIdValue(TASK_ID_VALUE) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java index adf83659f9f..43fadc60460 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeTest.java @@ -167,9 +167,9 @@ public class IngestModeTest "`BATCH_STATUS` VARCHAR(32)," + "`TABLE_BATCH_ID` INTEGER)"; - protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_TIMESTAMP(),'DONE')"; + protected String expectedMetadataTableIngestQuery = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_TIMESTAMP(),'DONE')"; - protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.`TABLE_NAME` = 'main'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_TIMESTAMP(),'DONE')"; + protected String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE batch_metadata.`TABLE_NAME` = 'main'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_TIMESTAMP(),'DONE')"; String expectedStagingCleanupQuery = "DELETE FROM `mydb`.`staging` as stage"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index 667cb8c5ccb..42494844153 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -14,7 +14,6 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; @@ -94,10 +93,10 @@ public void verifyNontemporalDeltaWithAuditingFilterDupsNoVersioning(GeneratorRe "sink.`amount` = stage.`amount`," + "sink.`biz_date` = stage.`biz_date`," + "sink.`digest` = stage.`digest`," + - "sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHEN NOT MATCHED THEN INSERT " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))"; + "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(mergeSql, milestoningSqlList.get(0)); @@ -164,10 +163,10 @@ public void verifyNonTemporalDeltaWithWithAuditingFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) " + "as stage ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + - "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "THEN UPDATE SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`,sink.`batch_update_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHEN NOT MATCHED " + "THEN INSERT (`id`, `name`, `amount`, `biz_date`, `digest`, `batch_update_time`) " + - "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))"; + "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, operations.get(0).preActionsSql().get(0)); Assertions.assertEquals(enrichSqlWithDataSplits(mergeSql, dataSplitRanges.get(0)), operations.get(0).ingestSql().get(0)); @@ -279,6 +278,37 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); } + @Override + public void verifyNontemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + + String mergeSql = "MERGE INTO `mydb`.`main` as sink " + + "USING " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage WHERE (stage.`biz_date` > '2020-01-10') OR ((stage.`biz_date` > '2020-01-01') AND (stage.`biz_date` < '2020-01-05'))) as stage " + + "ON (sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`) " + + "WHEN MATCHED AND sink.`digest` <> stage.`digest` " + + "THEN UPDATE SET " + + "sink.`id` = stage.`id`," + + "sink.`name` = stage.`name`," + + "sink.`amount` = stage.`amount`," + + "sink.`biz_date` = stage.`biz_date`," + + "sink.`digest` = stage.`digest` " + + "WHEN NOT MATCHED THEN " + + "INSERT (`id`, `name`, `amount`, `biz_date`, `digest`) " + + "VALUES (stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`)"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(mergeSql, milestoningSqlList.get(0)); + + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE (stage.`biz_date` > '2020-01-10') OR ((stage.`biz_date` > '2020-01-01') AND (stage.`biz_date` < '2020-01-05'))"; + // Stats + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + @Override public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java index 1636e9de80d..c04a95e96de 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalSnapshotTest.java @@ -58,7 +58,7 @@ public void verifyNontemporalSnapshotWithAuditingFilterDupsNoVersioning(Generato String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); @@ -79,7 +79,7 @@ public void verifyNontemporalSnapshotWithAuditingFailOnDupMaxVersion(GeneratorRe String insertSql = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `batch_update_time`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') FROM " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') FROM " + "`mydb`.`staging_legend_persistence_temp_staging` as stage)"; Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTableWithAuditPKCreateQuery, preActionsSqlList.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 7290a5e44eb..21da3c27a14 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -23,6 +23,8 @@ import java.util.List; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; + public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @Override @@ -316,6 +318,35 @@ public void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(getExpectedMetadataTableIngestWithStagingFiltersQuery(), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 WHERE (sink.`batch_id_out` = 999999999) AND " + + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE (((sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`)) AND (stage.`batch_id_in` > 5)))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging` as stage " + + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + + "AND (sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) " + + "AND (stage.`batch_id_in` > 5))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations) { @@ -338,12 +369,74 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_rank` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "stage.`version`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "WHERE stage.`batch_id_in` > 5 GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, " + + "stage.`digest`, stage.`version`) as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestWithStagingFiltersQuery(), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` " + + "as stage WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + + "AND (stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_rank` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "stage.`version`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "WHERE stage.`batch_id_in` > 5 GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, " + + "stage.`digest`, stage.`version`) as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 9cc6944a77d..9ae66a9d31d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -35,7 +35,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult op String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -45,7 +45,7 @@ public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersion(GeneratorResult op "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -72,7 +72,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -82,7 +82,7 @@ public void verifyUnitemporalDeltaNoDeleteIndFilterDupsAllVersionWithoutPerform( "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + @@ -119,7 +119,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDedupNoVersion(Gener String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -131,7 +131,7 @@ public void verifyUnitemporalDeltaWithDeleteIndMultiValuesNoDedupNoVersion(Gener "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -162,7 +162,7 @@ public void verifyUnitemporalDeltaWithDeleteInd(GeneratorResult operations) String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE " + "(sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + @@ -174,7 +174,7 @@ public void verifyUnitemporalDeltaWithDeleteInd(GeneratorResult operations) "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + @@ -193,7 +193,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + @@ -203,7 +203,7 @@ public void verifyUnitemporalDeltaWithDeleteIndFailOnDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + "(sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND " + @@ -236,8 +236,8 @@ public void verifyUnitemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); @@ -254,7 +254,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE " + "((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`)))"; @@ -263,7 +263,7 @@ public void verifyUnitemporalDeltaWithLessColumnsInStaging(GeneratorResult opera "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) AND (sink.`digest` = stage.`digest`) " + @@ -285,7 +285,7 @@ public void verifyUnitemporalDeltaWithPlaceholders(GeneratorResult operations) String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + "SET sink.`batch_id_out` = {BATCH_ID_PATTERN}-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TS_PATTERN}') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TS_PATTERN}') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -294,7 +294,7 @@ public void verifyUnitemporalDeltaWithPlaceholders(GeneratorResult operations) String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "{BATCH_ID_PATTERN},999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "{BATCH_ID_PATTERN},999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -329,7 +329,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) String expectedMilestoneQuery = "UPDATE `my_schema`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `my_schema`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -339,7 +339,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `my_schema`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `my_schema`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -374,7 +374,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati String expectedMilestoneQuery = "UPDATE `mydb`.`my_schema`.`main` as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM `mydb`.`my_schema`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -384,7 +384,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`my_schema`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`my_schema`.`main` as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + @@ -419,7 +419,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper String expectedMilestoneQuery = "UPDATE main as sink " + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1," + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) AND " + "(EXISTS (SELECT * FROM staging as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + @@ -429,7 +429,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')," + - "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM staging as stage " + "WHERE NOT (EXISTS (SELECT * FROM main as sink " + "WHERE (sink.`batch_id_out` = 999999999) " + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java index cb6962a0515..3062b2d59ea 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaDateTimeBasedTest.java @@ -33,8 +33,8 @@ public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -42,10 +42,10 @@ public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + "AND (sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); @@ -57,9 +57,9 @@ public void verifyUnitemporalDeltaNoDeleteIndNoDedupNoVersioning(GeneratorResult // Stats String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -69,8 +69,8 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND " + "(sink.`digest` <> stage.`digest`)))"; @@ -78,11 +78,11 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + "AND (sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, operations.get(0).preActionsSql().get(0)); @@ -98,9 +98,9 @@ public void verifyUnitemporalDeltaNoDeleteIndFailOnDupsAllVersionWithoutPerform( // Stats String incomingRecordCount = "SELECT COALESCE(SUM(stage.`legend_persistence_count`),0) as `incomingRecordCount` FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE (stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) as `rowsInserted`"; String rowsTerminated = "SELECT 0 as `rowsTerminated`"; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -113,9 +113,9 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(GeneratorResu List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE " + - "(sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "(sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -124,9 +124,9 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(GeneratorResu "(`id`, `name`, `amount`, `biz_date`, `digest`, " + "`batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + "(stage.`delete_indicator` NOT IN ('yes','1','true')))"; @@ -139,10 +139,10 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(GeneratorResu // Stats String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))))"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsInserted`"; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsTerminated`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))))) as `rowsInserted`"; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))))) as `rowsTerminated`"; verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -150,9 +150,9 @@ public void verifyUnitemporalDeltaWithDeleteIndNoDedupNoVersioning(GeneratorResu public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List operations, List dataSplitRanges) { String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE " + - "(sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "(sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((stage.`data_split` >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) " + "AND ((sink.`digest` <> stage.`digest`) OR (stage.`delete_indicator` IN ('yes','1','true')))))"; @@ -161,10 +161,10 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.`data_split` <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (sink.`digest` = stage.`digest`) " + "AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) AND " + "(stage.`delete_indicator` NOT IN ('yes','1','true')))"; @@ -181,10 +181,10 @@ public void verifyUnitemporalDeltaWithDeleteIndFilterDupsAllVersion(List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; + String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` <> stage.`DIGEST`)))"; - String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; + String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` (`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) (SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage WHERE NOT (EXISTS (SELECT * FROM `MYDB`.`MAIN` as sink WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (sink.`DIGEST` = stage.`DIGEST`) AND ((sink.`ID` = stage.`ID`) AND (sink.`NAME` = stage.`NAME`)))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 6e709c287b3..447d4cb9926 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -40,7 +40,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(Generat List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -48,7 +48,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersioning(Generat String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; @@ -72,7 +72,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -80,7 +80,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; @@ -89,7 +89,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(BigQueryTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); @@ -105,7 +105,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBa List milestoningSql = operations.ingestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE sink.`batch_id_out` = 999999999"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); @@ -124,14 +124,14 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizerFilte String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink " + "SET sink.`BATCH_ID_OUT` = (SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA " + - "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN')-1,sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`BATCH_ID_OUT` = 999999999) AND (NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_ID_IN`, `BATCH_ID_OUT`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`,(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 " + "FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),999999999," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink WHERE sink.`BATCH_ID_OUT` = 999999999)))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQueryWithUpperCase, preActionsSql.get(0)); @@ -154,7 +154,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(GeneratorR List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -163,7 +163,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersioning(GeneratorR String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND (sink.`biz_date` = stage.`biz_date`))))"; @@ -197,7 +197,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(Gen List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + @@ -206,7 +206,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersioning(Gen String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; @@ -226,7 +226,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmp List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; @@ -252,7 +252,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -260,7 +260,7 @@ public void verifyUnitemporalSnapshotWithLessColumnsInStaging(GeneratorResult op String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`digest`," + - "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; @@ -280,7 +280,7 @@ public void verifyUnitemporalSnapshotWithPlaceholders(GeneratorResult operations List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = {BATCH_ID_PATTERN}-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TS_PATTERN}') " + + "SET sink.`batch_id_out` = {BATCH_ID_PATTERN}-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TS_PATTERN}') " + "WHERE (sink.`batch_id_out` = 999999999) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -288,7 +288,7 @@ public void verifyUnitemporalSnapshotWithPlaceholders(GeneratorResult operations String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "{BATCH_ID_PATTERN},999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "{BATCH_ID_PATTERN},999999999,PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TS_PATTERN}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_id_out` = 999999999)))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index 8e748b0bb41..e51aa4cdfe3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -28,10 +28,10 @@ public class UnitemporalSnapshotDateTimeBasedTest extends UnitmemporalSnapshotDa { String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage"; - String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))"; + String rowsUpdated = "SELECT COUNT(*) as `rowsUpdated` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))))"; String rowsDeleted = "SELECT 0 as `rowsDeleted`"; - String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsInserted`"; - String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'))))) as `rowsTerminated`"; + String rowsInserted = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))))) as `rowsInserted`"; + String rowsTerminated = "SELECT (SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))-(SELECT COUNT(*) FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000')) AND (EXISTS (SELECT * FROM `mydb`.`main` as sink2 WHERE ((sink2.`id` = sink.`id`) AND (sink2.`name` = sink.`name`)) AND (sink2.`batch_time_in` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'))))) as `rowsTerminated`"; @Override public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorResult operations) @@ -41,8 +41,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorR List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -50,9 +50,9 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupNoVersion(GeneratorR String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + - "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))))"; + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59'))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); @@ -73,8 +73,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Map deduplicationAndVersioningErrorChecksSql = operations.deduplicationAndVersioningErrorChecksSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`))))"; @@ -82,9 +82,9 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + - "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))))"; + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59'))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); @@ -98,7 +98,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(BigQueryTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); Assertions.assertEquals(BigQueryTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(BigQueryTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); } @Override @@ -109,8 +109,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandli List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')"; + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); @@ -127,17 +127,17 @@ public void verifyUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer(Gene List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `MYDB`.`MAIN` as sink SET " + - "sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(NOT (EXISTS (SELECT * FROM `MYDB`.`STAGING` as stage WHERE ((sink.`ID` = stage.`ID`) " + "AND (sink.`NAME` = stage.`NAME`)) AND (sink.`DIGEST` = stage.`DIGEST`))))"; String expectedUpsertQuery = "INSERT INTO `MYDB`.`MAIN` " + "(`ID`, `NAME`, `AMOUNT`, `BIZ_DATE`, `DIGEST`, `BATCH_TIME_IN`, `BATCH_TIME_OUT`) " + "(SELECT stage.`ID`,stage.`NAME`,stage.`AMOUNT`,stage.`BIZ_DATE`,stage.`DIGEST`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `MYDB`.`STAGING` as stage " + "WHERE NOT (stage.`DIGEST` IN (SELECT sink.`DIGEST` FROM `MYDB`.`MAIN` as sink " + - "WHERE sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59'))))"; + "WHERE sink.`BATCH_TIME_OUT` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59'))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQueryWithUpperCase, preActionsSql.get(1)); @@ -155,8 +155,8 @@ public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResu List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) " + + "SET sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) " + "AND (NOT (EXISTS " + "(SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) " + "AND (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE sink.`biz_date` = stage.`biz_date`))"; @@ -164,9 +164,9 @@ public void verifyUnitemporalSnapshotWithPartitionNoDedupNoVersion(GeneratorResu String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') " + "FROM `mydb`.`staging` as stage " + - "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND (sink.`biz_date` = stage.`biz_date`))))"; + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND (sink.`biz_date` = stage.`biz_date`))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); @@ -185,8 +185,8 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(Genera List metadataIngestSql = operations.metadataIngestSql(); String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET " + - "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000') " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000') " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(NOT (EXISTS (SELECT * FROM `mydb`.`staging` as stage WHERE ((sink.`id` = stage.`id`) AND " + "(sink.`name` = stage.`name`)) AND (sink.`digest` = stage.`digest`)))) AND " + "(sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; @@ -194,9 +194,9 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDedupNoVersion(Genera String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_time_in`, `batch_time_out`) " + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + + "PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59') FROM `mydb`.`staging` as stage " + "WHERE NOT (stage.`digest` IN (SELECT sink.`digest` FROM `mydb`.`main` as sink " + - "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','9999-12-31 23:59:59')) AND " + + "WHERE (sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','9999-12-31 23:59:59')) AND " + "(sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00')))))"; Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableTimeBasedCreateQuery, preActionsSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/transformer/PlaceholderTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/transformer/PlaceholderTest.java index d486d75f3cf..d19fd8c7eff 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/transformer/PlaceholderTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/transformer/PlaceholderTest.java @@ -42,7 +42,7 @@ void testTimestampPlaceholder() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TIMESTAMP_PLACEHOLDER}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_END_TIMESTAMP_PLACEHOLDER}'),'DONE')"; + String expectedSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TIMESTAMP_PLACEHOLDER}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_END_TIMESTAMP_PLACEHOLDER}'),'DONE')"; Assertions.assertEquals(expectedSql, list.get(0)); } @@ -56,7 +56,7 @@ void testTimestampPlaceholderWithUpperCase() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE UPPER(batch_metadata.`TABLE_NAME`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TIMESTAMP_PLACEHOLDER}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_END_TIMESTAMP_PLACEHOLDER}'),'DONE')"; + String expectedSql = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`) (SELECT 'main',(SELECT COALESCE(MAX(batch_metadata.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as batch_metadata WHERE UPPER(batch_metadata.`TABLE_NAME`) = 'MAIN'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TIMESTAMP_PLACEHOLDER}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_END_TIMESTAMP_PLACEHOLDER}'),'DONE')"; Assertions.assertEquals(expectedSql, list.get(0)); } @@ -70,7 +70,7 @@ void testBatchIdAndTimestampPlaceholder() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',{BATCH_ID_PATTERN},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_START_TIMESTAMP_PLACEHOLDER}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','{BATCH_END_TIMESTAMP_PLACEHOLDER}'),'DONE')"; + String expectedSql = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) (SELECT 'main',{BATCH_ID_PATTERN},PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_START_TIMESTAMP_PLACEHOLDER}'),PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','{BATCH_END_TIMESTAMP_PLACEHOLDER}'),'DONE')"; Assertions.assertEquals(expectedSql, list.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java index 8d7be47cd26..b1fa9686c99 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java @@ -24,14 +24,14 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA " + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + - "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`TABLE_NAME`) = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`TABLE_NAME`) = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%E6S','2000-01-01 00:00:00.000000'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass1.csv new file mode 100644 index 00000000000..73a2f688718 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass1.csv @@ -0,0 +1,3 @@ +ANDY,3000,2022-12-03,2023-01-01 00:00:00,a11b7277f3c549f7a172efaba6170531 +HARRY,1000,2022-12-01,2023-01-01 00:00:00,181e95e5d4f3cb2861d4332351efc102 +ROBERT,2000,2022-12-02,2023-01-01 00:00:00,61e0cd6d24d93a03495c7e1557bb8ac7 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass2.csv new file mode 100644 index 00000000000..d268f49447d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass2.csv @@ -0,0 +1,6 @@ +ANDY,3000,2022-12-03,2023-01-01 00:00:00,a11b7277f3c549f7a172efaba6170531 +ANDY,3100,2022-12-03,2023-01-02 00:00:00,a6d40d87fec7feb2e1419d88a6210e30 +HARRY,1000,2022-12-01,2023-01-01 00:00:00,181e95e5d4f3cb2861d4332351efc102 +MATT,4000,2022-12-06,2023-01-02 00:00:00,cb908dd9a1d284dca4e93a116a0a34e6 +ROBERT,2000,2022-12-02,2023-01-01 00:00:00,61e0cd6d24d93a03495c7e1557bb8ac7 +ROBERT,2000,2022-12-02,2023-01-02 00:00:00,5e9487021d62fe0c1036dee80bd35288 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass3.csv new file mode 100644 index 00000000000..d17cefca258 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass3.csv @@ -0,0 +1,3 @@ +3,ANDY,3000,2022-12-03,2023-01-01 00:00:00,ab516a99f05bc4c9df9524b24e2e79fe,2000-01-01T00:00:00 +1,HARRY,1000,2022-12-01,2023-01-01 00:00:00,ca6964c25446026f294fca2cf80e9781,2000-01-01T00:00:00 +2,ROBERT,2000,2022-12-02,2023-01-01 00:00:00,f9ea83fc52c7548d74f9b7cb6d49313f,2000-01-01T00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass4.csv new file mode 100644 index 00000000000..0478d871307 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/append/digest_generation/data_pass4.csv @@ -0,0 +1,6 @@ +3,ANDY,3000,2022-12-03,2023-01-01 00:00:00,ab516a99f05bc4c9df9524b24e2e79fe,2000-01-01T00:00:00 +3,ANDY,3100,2022-12-03,2023-01-02 00:00:00,9ad9d80e387475658c0844cc90f8d284,2000-01-02T00:00:00 +1,HARRY,1000,2022-12-01,2023-01-01 00:00:00,ca6964c25446026f294fca2cf80e9781,2000-01-01T00:00:00 +4,MATT,4000,2022-12-06,2023-01-02 00:00:00,6740df64335a00377bbf4ca44fffff72,2000-01-02T00:00:00 +2,ROBERT,2000,2022-12-02,2023-01-01 00:00:00,f9ea83fc52c7548d74f9b7cb6d49313f,2000-01-01T00:00:00 +2,ROBERT,2000,2022-12-02,2023-01-02 00:00:00,c32a1dd4f2d691726bd52a32d4211994,2000-01-02T00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass1.csv new file mode 100644 index 00000000000..37e8837bd3e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass1.csv @@ -0,0 +1,3 @@ +HARRY,1000,2022-12-01,2023-01-01 00:00:00 +ROBERT,2000,2022-12-02,2023-01-01 00:00:00 +ANDY,3000,2022-12-03,2023-01-01 00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass2.csv new file mode 100644 index 00000000000..8252b89a02a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass2.csv @@ -0,0 +1,3 @@ +ROBERT,2000,2022-12-02,2023-01-02 00:00:00 +ANDY,3100,2022-12-03,2023-01-02 00:00:00 +MATT,4000,2022-12-06,2023-01-02 00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass3.csv new file mode 100644 index 00000000000..9f0c612f79b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass3.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2022-12-01,2023-01-01 00:00:00 +2,ROBERT,2000,2022-12-02,2023-01-01 00:00:00 +3,ANDY,3000,2022-12-03,2023-01-01 00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass4.csv new file mode 100644 index 00000000000..e6f15f85ff6 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/digest_generation/data_pass4.csv @@ -0,0 +1,3 @@ +2,ROBERT,2000,2022-12-02,2023-01-02 00:00:00 +3,ANDY,3100,2022-12-03,2023-01-02 00:00:00 +4,MATT,4000,2022-12-06,2023-01-02 00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index 2858ce87f80..afb9547d356 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -114,7 +114,7 @@ public boolean enableConcurrentSafety() public abstract Optional infiniteBatchIdValue(); - public abstract Optional bulkLoadTaskIdValue(); + public abstract Optional bulkLoadEventIdValue(); @Default public String bulkLoadBatchStatusPattern() @@ -137,7 +137,7 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) - .bulkLoadTaskIdValue(bulkLoadTaskIdValue()) + .bulkLoadEventIdValue(bulkLoadEventIdValue()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 12faa47d745..7bf2618518f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -140,7 +140,7 @@ public Set schemaEvolutionCapabilitySet() public abstract RelationalSink relationalSink(); - public abstract Optional bulkLoadTaskIdValue(); + public abstract Optional bulkLoadEventIdValue(); @Derived protected PlannerOptions plannerOptions() @@ -151,7 +151,7 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) - .bulkLoadTaskIdValue(bulkLoadTaskIdValue()) + .bulkLoadEventIdValue(bulkLoadEventIdValue()) .build(); } @@ -512,7 +512,7 @@ private void init(Datasets datasets) .batchStartTimestampPattern(BATCH_START_TS_PATTERN) .batchEndTimestampPattern(BATCH_END_TS_PATTERN) .batchIdPattern(BATCH_ID_PATTERN) - .bulkLoadTaskIdValue(bulkLoadTaskIdValue()) + .bulkLoadEventIdValue(bulkLoadEventIdValue()) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, plannerOptions(), relationalSink().capabilities()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java index 52423165eab..993c1825a75 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java @@ -48,7 +48,8 @@ public enum FunctionName PARSE_JSON("PARSE_JSON"), TO_VARIANT("TO_VARIANT"), OBJECT_CONSTRUCT("OBJECT_CONSTRUCT"), - TO_JSON("TO_JSON"); + TO_JSON("TO_JSON"), + CONVERT("CONVERT"); private static final Map BY_NAME = Arrays .stream(FunctionName.values()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/DeleteStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/DeleteStatement.java index e85e00c7f95..9a2b3540898 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/DeleteStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/DeleteStatement.java @@ -37,9 +37,9 @@ public DeleteStatement(Table table, Condition condition) } /* - DELETE GENERIC PLAN: - DELETE FROM table-Name [[AS] correlation-Name] [WHERE clause] - */ + DELETE GENERIC PLAN: + DELETE FROM table-Name [[AS] correlation-Name] [WHERE clause] + */ @Override public void genSql(StringBuilder builder) throws SqlDomException { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/ShowCommand.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/ShowCommand.java index af84a7fe9eb..6b7939461ff 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/ShowCommand.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/statements/ShowCommand.java @@ -18,33 +18,33 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; import org.finos.legend.engine.persistence.components.relational.sqldom.common.ShowType; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; + +import java.util.Optional; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; public class ShowCommand implements SqlGen { private final ShowType operation; - private String schemaName; + private Optional schemaName; + private final String quoteIdentifier; - public ShowCommand(ShowType operation) - { - this(operation, null); - } - - public ShowCommand(ShowType operation, String schemaName) + public ShowCommand(ShowType operation, Optional schemaName, String quoteIdentifier) { this.operation = operation; this.schemaName = schemaName; + this.quoteIdentifier = quoteIdentifier; } - public String getSchemaName() + public Optional getSchemaName() { return schemaName; } public void setSchemaName(String schemaName) { - this.schemaName = schemaName; + this.schemaName = Optional.of(schemaName); } /* @@ -60,10 +60,10 @@ public void genSql(StringBuilder builder) throws SqlDomException { builder.append(Clause.SHOW.get()); builder.append(WHITE_SPACE + operation.name()); - if (operation == ShowType.TABLES && schemaName != null) + if (operation == ShowType.TABLES && schemaName.isPresent() && !schemaName.get().isEmpty()) { builder.append(WHITE_SPACE + Clause.FROM.get()); - builder.append(WHITE_SPACE + schemaName); + builder.append(WHITE_SPACE + SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ShowCommandTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ShowCommandTest.java index 8c69f0d9b5b..879a9ee2b91 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ShowCommandTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/test/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/ShowCommandTest.java @@ -19,6 +19,8 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.ShowCommand; import org.junit.jupiter.api.Test; +import java.util.Optional; + import static org.junit.jupiter.api.Assertions.assertEquals; public class ShowCommandTest @@ -28,9 +30,9 @@ public class ShowCommandTest public void testShowCommand() { Table table = new Table("CITIBIKE", "public", "trips", null, BaseTest.QUOTE_IDENTIFIER); - ShowCommand command = new ShowCommand(ShowType.TABLES, table.getSchema()); + ShowCommand command = new ShowCommand(ShowType.TABLES, Optional.of(table.getSchema()), BaseTest.QUOTE_IDENTIFIER); String sql = BaseTest.genSqlIgnoringErrors(command); - String expected = "SHOW TABLES FROM public"; + String expected = "SHOW TABLES FROM \"public\""; assertEquals(expected, sql); } @@ -38,7 +40,7 @@ public void testShowCommand() public void testShowCommandWithoutSchema() { Table table = new Table("CITIBIKE", null, "trips", null, BaseTest.QUOTE_IDENTIFIER); - ShowCommand command = new ShowCommand(ShowType.TABLES); + ShowCommand command = new ShowCommand(ShowType.TABLES, Optional.empty(), BaseTest.QUOTE_IDENTIFIER); String sql = BaseTest.genSqlIgnoringErrors(command); String expected = "SHOW TABLES"; assertEquals(expected, sql); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java index ea69a121e66..f067057cd10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; @@ -30,16 +30,20 @@ ) public interface H2StagedFilesDatasetPropertiesAbstract extends StagedFilesDatasetProperties { - FileFormat fileFormat(); + FileFormatType fileFormat(); @Value.Check default void validate() { - if (files().size() != 1) + if (filePatterns().size() > 0) + { + throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, filePatterns not supported"); + } + if (filePaths().size() != 1) { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only 1 file per load supported"); } - if (fileFormat() != FileFormat.CSV) + if (fileFormat() != FileFormatType.CSV) { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only CSV file loading supported"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java index f40354b288d..46690aae586 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java @@ -14,7 +14,13 @@ package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; +import org.finos.legend.engine.persistence.components.logicalplan.values.ObjectValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; @@ -40,7 +46,18 @@ public VisitorResult visit(PhysicalPlanNode prev, DigestUdf current, VisitorCont for (int i = 0; i < current.values().size(); i++) { columnNameList.add(StringValue.of(current.fieldNames().get(i))); - columnValueList.add(current.values().get(i)); + + if (current.values().get(i) instanceof StagedFilesFieldValue) + { + // The field, being StagedFilesFieldValue, is already a String + StagedFilesFieldValue stagedFilesField = (StagedFilesFieldValue) current.values().get(i); + columnValueList.add(FieldValue.builder().fieldName(stagedFilesField.fieldName()).build()); + } + else + { + // Else need to convert the field into a String + columnValueList.add(FunctionImpl.builder().functionName(FunctionName.CONVERT).addValue(current.values().get(i), ObjectValue.of(DataType.VARCHAR.name())).build()); + } } ToArrayFunction columnNames = ToArrayFunction.builder().addAllValues(columnNameList).build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java index b697e4140ea..8f84482ee90 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -32,7 +32,7 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu throw new IllegalStateException("Only H2StagedFilesDatasetProperties are supported for H2 Sink"); } H2StagedFilesDatasetProperties datasetProperties = (H2StagedFilesDatasetProperties) current.properties(); - CsvRead csvRead = new CsvRead(datasetProperties.files().get(0), String.join(",", current.columns()), null); + CsvRead csvRead = new CsvRead(datasetProperties.filePaths().get(0), String.join(",", current.columns()), null); prev.push(csvRead); return new VisitorResult(null); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java index 1b993221bc6..7e46fce9d1e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java @@ -15,9 +15,6 @@ package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; -import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; @@ -49,17 +46,7 @@ public VisitorResult visit(PhysicalPlanNode prev, ToArrayFunction current, Visit if (current.values() != null) { List logicalPlanNodeList = new ArrayList<>(); - for (Value value : current.values()) - { - if (value instanceof StagedFilesFieldValue) - { - logicalPlanNodeList.add(FieldValue.builder().fieldName(((StagedFilesFieldValue) value).fieldName()).build()); - } - else - { - logicalPlanNodeList.add(value); - } - } + logicalPlanNodeList.addAll(current.values()); return new VisitorResult(function, logicalPlanNodeList); } return new VisitorResult(null); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index 0341dd2620d..699402557b7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -449,6 +449,16 @@ protected void loadStagingDataWithVersionInUpperCase(String path) throws Excepti h2Sink.executeStatement(loadSql); } + protected void loadStagingDataWithVersionWithoutDigest(String path) throws Exception + { + validateFileExists(path); + String loadSql = "TRUNCATE TABLE \"TEST\".\"staging\";" + + "INSERT INTO \"TEST\".\"staging\"(id, name, income, start_time ,expiry_date, version) " + + "SELECT CONVERT( \"id\",INT ), \"name\", CONVERT( \"income\", BIGINT), CONVERT( \"start_time\", DATETIME), CONVERT( \"expiry_date\", DATE), CONVERT( \"version\",INT)" + + " FROM CSVREAD( '" + path + "', 'id, name, income, start_time, expiry_date, version', NULL )"; + h2Sink.executeStatement(loadSql); + } + protected void loadStagingDataWithFilter(String path) throws Exception { validateFileExists(path); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java index 1ed5b1b8fd7..02bd2c12e9a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/TestUtils.java @@ -17,6 +17,13 @@ import com.opencsv.CSVReader; import org.finos.legend.engine.persistence.components.common.DatasetFilter; import org.finos.legend.engine.persistence.components.common.FilterType; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThanEqualTo; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.LessThan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.LessThanEqualTo; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Or; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; @@ -24,9 +31,13 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.JsonExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.executor.RelationalExecutionHelper; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.NumericalValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.junit.jupiter.api.Assertions; @@ -85,6 +96,7 @@ public class TestUtils // Special columns public static String digestName = "digest"; + public static String digestUDF = "LAKEHOUSE_MD5"; public static String versionName = "version"; public static String batchUpdateTimeName = "batch_update_time"; public static String batchIdInName = "batch_id_in"; @@ -266,6 +278,18 @@ public static SchemaDefinition getStagingSchemaWithNonPkVersion() .build(); } + public static SchemaDefinition getStagingSchemaWithNonPkVersionWithoutDigest() + { + return SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(income) + .addFields(startTime) + .addFields(expiryDate) + .addFields(version) + .build(); + } + public static SchemaDefinition getStagingSchemaWithFilterForDB() { return SchemaDefinition.builder() @@ -355,6 +379,23 @@ public static DatasetDefinition getStagingTableWithNoPks() .build(); } + public static FilteredDataset getFilteredStagingTableWithComplexFilter() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getSchemaWithNoPKs()) + .alias(stagingTableName) + .filter(And.builder() + .addConditions(GreaterThan.of(FieldValue.builder().fieldName(incomeName).datasetRefAlias(stagingTableName).build(), NumericalValue.of(1000L))) + .addConditions(Or.builder() + .addConditions(GreaterThanEqualTo.of(FieldValue.builder().fieldName(expiryDateName).datasetRefAlias(stagingTableName).build(), StringValue.of("2022-12-03"))) + .addConditions(LessThanEqualTo.of(FieldValue.builder().fieldName(expiryDateName).datasetRefAlias(stagingTableName).build(), StringValue.of("2022-12-01"))) + .build()) + .build()) + .build(); + } + public static DatasetDefinition getBasicStagingTableWithExpiryDatePk() { return DatasetDefinition.builder() @@ -382,6 +423,15 @@ public static DatasetDefinition getStagingTableWithNonPkVersion() .build(); } + public static DatasetDefinition getStagingTableWithNonPkVersionWithoutDigest() + { + return DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getStagingSchemaWithNonPkVersionWithoutDigest()) + .build(); + } + public static DatasetDefinition getStagingTableWithFilterForDB() { return DatasetDefinition.builder() @@ -411,6 +461,20 @@ public static DerivedDataset getDerivedStagingTableWithFilter() .build(); } + public static FilteredDataset getFilteredStagingTable() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getStagingSchema()) + .alias(stagingTableName) + .filter(Equals.of(FieldValue.builder() + .fieldName(batchName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(1L))) + .build(); + } + public static DerivedDataset getStagingTableWithFilterSecondPass() { return DerivedDataset.builder() @@ -423,6 +487,20 @@ public static DerivedDataset getStagingTableWithFilterSecondPass() .build(); } + public static FilteredDataset getFilteredStagingTableSecondPass() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getStagingSchema()) + .alias(stagingTableName) + .filter(GreaterThan.of(FieldValue.builder() + .fieldName(batchName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(1L))) + .build(); + } + public static DerivedDataset getDerivedStagingTableWithFilterWithVersion() { return DerivedDataset.builder() @@ -434,6 +512,20 @@ public static DerivedDataset getDerivedStagingTableWithFilterWithVersion() .build(); } + public static FilteredDataset getFilteredStagingTableWithVersion() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getStagingSchemaWithVersion()) + .alias(stagingTableName) + .filter(GreaterThanEqualTo.of(FieldValue.builder() + .fieldName(batchName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(2L))) + .build(); + } + public static DerivedDataset getStagingTableWithFilterWithVersionSecondPass() { return DerivedDataset.builder() @@ -445,6 +537,20 @@ public static DerivedDataset getStagingTableWithFilterWithVersionSecondPass() .build(); } + public static FilteredDataset getFilteredStagingTableWithVersionSecondPass() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(getStagingSchemaWithVersion()) + .alias(stagingTableName) + .filter(GreaterThanEqualTo.of(FieldValue.builder() + .fieldName(batchName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(3L))) + .build(); + } + public static JsonExternalDatasetReference getBasicJsonDatasetReferenceTable(String dataPath) { return JsonExternalDatasetReference.builder() @@ -719,6 +825,50 @@ public static DatasetDefinition getEntityPriceWithVersionStagingTable() .build(); } + public static FilteredDataset getEntityPriceWithVersionFilteredStagingTable() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .alias(stagingTableName) + .schema(SchemaDefinition.builder() + .addFields(date) + .addFields(entity) + .addFields(price) + .addFields(volume) + .addFields(digest) + .addFields(version) + .build() + ) + .filter(GreaterThanEqualTo.of(FieldValue.builder() + .fieldName(volumeName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(100L))) + .build(); + } + + public static FilteredDataset getEntityPriceWithVersionFilteredStagingTableSecondPass() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .alias(stagingTableName) + .schema(SchemaDefinition.builder() + .addFields(date) + .addFields(entity) + .addFields(price) + .addFields(volume) + .addFields(digest) + .addFields(version) + .build() + ) + .filter(GreaterThanEqualTo.of(FieldValue.builder() + .fieldName(volumeName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(500L))) + .build(); + } + public static DatasetDefinition getBitemporalMainTable() { return DatasetDefinition.builder() @@ -928,6 +1078,46 @@ public static DatasetDefinition getBitemporalFromOnlyStagingTableIdBased() .build(); } + public static FilteredDataset getBitemporalFromOnlyFilteredStagingTableIdBased() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(SchemaDefinition.builder() + .addFields(index) + .addFields(dateTime) + .addFields(balance) + .addFields(digest) + .build() + ) + .alias(stagingTableName) + .filter(LessThanEqualTo.of(FieldValue.builder() + .fieldName(balanceName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(3L))) + .build(); + } + + public static FilteredDataset getBitemporalFromOnlyFilteredStagingTableIdBasedSecondPass() + { + return FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(SchemaDefinition.builder() + .addFields(index) + .addFields(dateTime) + .addFields(balance) + .addFields(digest) + .build() + ) + .alias(stagingTableName) + .filter(LessThanEqualTo.of(FieldValue.builder() + .fieldName(balanceName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(20L))) + .build(); + } + public static DatasetDefinition getBitemporalFromOnlyStagingTableWithoutDuplicatesIdBased() { return DatasetDefinition.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java index f45582ddb34..fa339e39789 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaWithBatchIdTest.java @@ -27,6 +27,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.junit.jupiter.api.Assertions; @@ -849,6 +850,59 @@ void testMilestoningSourceSpecifiesFromSet5WithDataSplitFilterDuplicates() throw executePlansAndVerifyResultsWithSpecifiedDataSplits(ingestMode, options, datasets, schema, expectedDataPass6, expectedStats, dataSplitRanges); } + /* + Scenario: Test milestoning Logic with only validity from time specified when staging table pre populated + */ + @Test + void testMilestoningSourceSpecifiesFromSet6WithStagingFilter() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + FilteredDataset stagingTable = TestUtils.getBitemporalFromOnlyFilteredStagingTableIdBased(); + + String[] schema = new String[] {indexName, balanceName, digestName, startDateTimeName, endDateTimeName, batchIdInName, batchIdOutName}; + + // Create staging table + createStagingTable(TestUtils.getBitemporalFromOnlyStagingTableIdBased()); + + BitemporalDelta ingestMode = BitemporalDelta.builder() + .digestField(digestName) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .build()) + .validityMilestoning(ValidDateTime.builder() + .dateTimeFromName(startDateTimeName) + .dateTimeThruName(endDateTimeName) + .validityDerivation(SourceSpecifiesFromDateTime.builder() + .sourceDateTimeFromField(dateTimeName) + .build()) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.builder().mainDataset(mainTable).stagingDataset(stagingTable).build(); + + // ------------ Perform Pass1 ------------------------ + String dataPass1 = basePathForInput + "source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass1.csv"; + // 1. Load Staging table + loadStagingDataForBitemporalFromOnly(dataPass1); + // 2. Execute Plan and Verify Results + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + + // ------------ Perform Pass2 ------------------------ + // 0. Create new filter + datasets = Datasets.of(mainTable, TestUtils.getBitemporalFromOnlyFilteredStagingTableIdBasedSecondPass()); + String dataPass2 = basePathForInput + "source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass2.csv"; + // 1. Load Staging table + loadStagingDataForBitemporalFromOnly(dataPass2); + // 2. Execute Plan and Verify Results + expectedStats = createExpectedStatsMap(8, 0, 6, 3, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + } + /* Scenario: Test milestoning Logic with only validity from time specified when staging table pre populated */ diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 4f09b145a66..d58eee0fe64 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -16,7 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; @@ -104,8 +104,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoTaskId() throws Exce Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList(filePath)).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -177,8 +177,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList(filePath)).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -195,7 +195,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .bulkLoadEventIdValue(TASK_ID_VALUE_1) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -250,8 +250,8 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList(filePath)).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -267,7 +267,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .bulkLoadEventIdValue(TASK_ID_VALUE_1) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -325,8 +325,8 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList(filePath)).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -342,7 +342,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .bulkLoadEventIdValue(TASK_ID_VALUE_1) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -399,8 +399,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledTwoBatches() throws E Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList(filePath)).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -499,7 +499,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) - .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .bulkLoadEventIdValue(TASK_ID_VALUE_1) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -533,8 +533,8 @@ public void testBulkLoadStageHasPrimaryKey() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList("src/test/resources/data/bulk-load/input/staged_file1.csv")).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList("src/test/resources/data/bulk-load/input/staged_file1.csv")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, pkCol)).build()) .build(); @@ -546,7 +546,7 @@ public void testBulkLoadStageHasPrimaryKey() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) - .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .bulkLoadEventIdValue(TASK_ID_VALUE_1) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -580,8 +580,8 @@ public void testBulkLoadMainHasPrimaryKey() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList("src/test/resources/data/bulk-load/input/staged_file1.csv")).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Collections.singletonList("src/test/resources/data/bulk-load/input/staged_file1.csv")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -593,7 +593,7 @@ public void testBulkLoadMainHasPrimaryKey() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) - .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .bulkLoadEventIdValue(TASK_ID_VALUE_1) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -615,8 +615,8 @@ public void testBulkLoadMoreThanOneFile() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.csv", "src/test/resources/data/bulk-load/input/staged_file2.csv")).build()) + .fileFormat(FileFormatType.CSV) + .addAllFilePaths(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.csv", "src/test/resources/data/bulk-load/input/staged_file2.csv")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); Assertions.fail("Exception was not thrown"); @@ -635,8 +635,8 @@ public void testBulkLoadNotCsvFile() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.JSON) - .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.json")).build()) + .fileFormat(FileFormatType.JSON) + .addAllFilePaths(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.json")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); Assertions.fail("Exception was not thrown"); @@ -655,7 +655,7 @@ RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions o .executionTimestampClock(executionTimestampClock) .cleanupStagingData(options.cleanupStagingData()) .collectStatistics(options.collectStatistics()) - .bulkLoadTaskIdValue(taskId) + .bulkLoadEventIdValue(taskId) .enableConcurrentSafety(true) .caseConversion(caseConversion) .build(); @@ -668,14 +668,14 @@ private void verifyBulkLoadMetadata(Map appendMetadata, String f Assertions.assertEquals("main", appendMetadata.get("table_name")); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_start_ts_utc").toString()); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_end_ts_utc").toString()); - Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"files\":[\"%s\"]", fileName))); + Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"file_paths\":[\"%s\"]", fileName))); if (taskId.isPresent()) { - Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"task_id\":\"%s\"", taskId.get()))); + Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"event_id\":\"%s\"", taskId.get()))); } else { - Assertions.assertFalse(appendMetadata.get("batch_source_info").toString().contains("\"task_id\"")); + Assertions.assertFalse(appendMetadata.get("batch_source_info").toString().contains("\"event_id\"")); } } @@ -686,14 +686,14 @@ private void verifyBulkLoadMetadataForUpperCase(Map appendMetada Assertions.assertEquals("MAIN", appendMetadata.get("TABLE_NAME")); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("BATCH_START_TS_UTC").toString()); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("BATCH_END_TS_UTC").toString()); - Assertions.assertTrue(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains(String.format("\"files\":[\"%s\"]", fileName))); + Assertions.assertTrue(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains(String.format("\"file_paths\":[\"%s\"]", fileName))); if (taskId.isPresent()) { - Assertions.assertTrue(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains(String.format("\"task_id\":\"%s\"", taskId.get()))); + Assertions.assertTrue(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains(String.format("\"event_id\":\"%s\"", taskId.get()))); } else { - Assertions.assertFalse(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains("\"task_id\"")); + Assertions.assertFalse(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains("\"event_id\"")); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 11d59c8ad52..893bc6c1c75 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -24,14 +24,21 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; +import org.finos.legend.engine.persistence.components.relational.CaseConversion; +import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; import org.junit.jupiter.api.Assertions; @@ -46,6 +53,7 @@ import static org.finos.legend.engine.persistence.components.TestUtils.batchUpdateTimeName; import static org.finos.legend.engine.persistence.components.TestUtils.dataSplitName; import static org.finos.legend.engine.persistence.components.TestUtils.digestName; +import static org.finos.legend.engine.persistence.components.TestUtils.digestUDF; import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; import static org.finos.legend.engine.persistence.components.TestUtils.idName; import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; @@ -85,7 +93,7 @@ void testAppendOnlyVanillaUpperCase() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(NoAuditing.builder().build()) @@ -130,6 +138,59 @@ void testAppendOnlyVanillaUpperCase() throws Exception Assertions.assertEquals(stagingTableList.size(), 0); } + /* + Scenario: Test Append Only vanilla case + staging table is cleaned up in the end with upper case (2) + */ + @Test + void testAppendOnlyVanillaUpperCaseWithFilteredDataset() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + FilteredDataset stagingTable = TestUtils.getFilteredStagingTableWithComplexFilter(); + + // Create staging table + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"EXPIRY_DATE\" DATE)"); + + // Generate the milestoning object + AppendOnly ingestMode = AppendOnly.builder() + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{nameName.toUpperCase(), incomeName.toUpperCase(), expiryDateName.toUpperCase()}; + + // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ + String dataPass1 = basePath + "input/with_staging_filter/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/with_staging_filter/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithNoPkInUpperCase(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 2); + expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); + expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + + // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ + String dataPass2 = basePath + "input/with_staging_filter/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/with_staging_filter/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithNoPkInUpperCase(dataPass2); + // 2. Execute plans and verify results + expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 2); + expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); + expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + } + /* Scenario: Test Append Only with auditing, no versioning, filter duplicates and filter existing records (1) */ @@ -144,7 +205,7 @@ void testAppendOnlyWithAuditingNoVersioningFilterDuplicatesFilterExistingRecords // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) @@ -189,7 +250,7 @@ void testAppendOnlyWithAuditingMaxVersionFilterDuplicatesFilterExistingRecordsUp // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) @@ -238,7 +299,7 @@ void testAppendOnlyWithAuditingMaxVersionFilterDuplicatesNoFilterExistingRecords // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(versionName) @@ -288,7 +349,7 @@ void testAppendOnlyWithAuditingAllVersionFilterDuplicatesFilterExistingRecords() // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder() .versioningField(versionName) @@ -345,7 +406,7 @@ void testAppendOnlyWithAuditingAllVersionFilterDuplicatesNoFilterExistingRecords // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder() .versioningField(versionName) @@ -388,6 +449,183 @@ void testAppendOnlyWithAuditingAllVersionFilterDuplicatesNoFilterExistingRecords executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, incrementalClock); } + /* + Scenario: Scenario: Test Append Only vanilla case + staging table is cleaned up in the end with upper case with UDF based digest generation + */ + @Test + void testAppendOnlyWithUDFDigestGeneration() throws Exception + { + // Register UDF + H2DigestUtil.registerMD5Udf(h2Sink, digestUDF); + + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNoPks(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // Create staging table + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"EXPIRY_DATE\" DATE)"); + + // Generate the milestoning object + AppendOnly ingestMode = AppendOnly.builder() + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(digestUDF).digestField(digestName).build()) + .build(); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .caseConversion(CaseConversion.TO_UPPER) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST\".\"MAIN\"" + + "(\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"EXPIRY_DATE\" DATE,\"DIGEST\" VARCHAR)"; + + String expectedIngestSql = "INSERT INTO \"TEST\".\"MAIN\" " + + "(\"NAME\", \"INCOME\", \"EXPIRY_DATE\", \"DIGEST\") " + + "(SELECT staging.\"NAME\" as \"NAME\",staging.\"INCOME\" as \"INCOME\",staging.\"EXPIRY_DATE\" as \"EXPIRY_DATE\"," + + "LAKEHOUSE_MD5(ARRAY['NAME','INCOME','EXPIRY_DATE'],ARRAY[CONVERT(staging.\"NAME\",VARCHAR),CONVERT(staging.\"INCOME\",VARCHAR),CONVERT(staging.\"EXPIRY_DATE\",VARCHAR)]) " + + "FROM \"TEST\".\"STAGING\" as staging)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{nameName.toUpperCase(), incomeName.toUpperCase(), expiryDateName.toUpperCase(), digestName.toUpperCase()}; + + // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ + String dataPass1 = basePath + "input/digest_generation/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/digest_generation/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithNoPkInUpperCase(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); + expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + // 3. Assert that the staging table is truncated + List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); + Assertions.assertEquals(stagingTableList.size(), 0); + + // ------------ Perform incremental (append) milestoning With Clean Staging Table ------------------------ + String dataPass2 = basePath + "input/digest_generation/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/digest_generation/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithNoPkInUpperCase(dataPass2); + // 2. Execute plans and verify results + expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStats.put(StatisticName.ROWS_UPDATED.name(), 0); + expectedStats.put(StatisticName.ROWS_TERMINATED.name(), 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + // 3. Assert that the staging table is truncated + stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); + Assertions.assertEquals(stagingTableList.size(), 0); + } + + /* + Scenario: Test Append Only with auditing, all version, filter duplicates and no filter existing records with UDF based digest generation + */ + @Test + void testAppendOnlyWithUDFDigestGeneration2() throws Exception + { + // Register UDF + H2DigestUtil.registerMD5Udf(h2Sink, digestUDF); + + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getStagingTableWithNonPkVersionWithoutDigest(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + IncrementalClock incrementalClock = new IncrementalClock(fixedExecutionZonedDateTime1.toInstant(), ZoneOffset.UTC, 1000); + + // Create staging table + createStagingTableWithoutPks(stagingTable); + + // Generate the milestoning object + AppendOnly ingestMode = AppendOnly.builder() + .deduplicationStrategy(FilterDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .dataSplitFieldName(dataSplitName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) + .filterExistingRecords(false) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(digestUDF).digestField(digestName).build()) + .build(); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST\".\"main\"" + + "(\"id\" INTEGER NOT NULL,\"name\" VARCHAR(64) NOT NULL,\"income\" BIGINT,\"start_time\" TIMESTAMP NOT NULL,\"expiry_date\" DATE,\"version\" INTEGER,\"batch_update_time\" TIMESTAMP NOT NULL,\"digest\" VARCHAR,PRIMARY KEY (\"id\", \"start_time\", \"batch_update_time\"))"; + + String expectedIngestSql = "INSERT INTO \"TEST\".\"main\" " + + "(\"id\", \"name\", \"income\", \"start_time\", \"expiry_date\", \"version\", \"digest\", \"batch_update_time\") " + + "(SELECT staging.\"id\" as \"id\",staging.\"name\" as \"name\",staging.\"income\" as \"income\",staging.\"start_time\" as \"start_time\",staging.\"expiry_date\" as \"expiry_date\",staging.\"version\" as \"version\"," + + "LAKEHOUSE_MD5(ARRAY['id','name','income','start_time','expiry_date','version'],ARRAY[CONVERT(staging.\"id\",VARCHAR),CONVERT(staging.\"name\",VARCHAR),CONVERT(staging.\"income\",VARCHAR),CONVERT(staging.\"start_time\",VARCHAR),CONVERT(staging.\"expiry_date\",VARCHAR),CONVERT(staging.\"version\",VARCHAR)])," + + "'2000-01-01 00:00:00.000000' " + + "FROM \"TEST\".\"staging_legend_persistence_temp_staging\" as staging WHERE (staging.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (staging.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, versionName, digestName, batchUpdateTimeName}; + + // ------------ Perform incremental (append) milestoning Pass1 ------------------------ + String dataPass1 = basePath + "input/digest_generation2/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/digest_generation2/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithVersionWithoutDigest(dataPass1); + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = createExpectedStatsMap(3, 0, 3, 0, 0); + Map expectedStats2 = createExpectedStatsMap(1, 0, 1, 0, 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, incrementalClock); + + // ------------ Perform incremental (append) milestoning Pass2 ------------------------ + String dataPass2 = basePath + "input/digest_generation2/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/digest_generation2/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithVersionWithoutDigest(dataPass2); + // 2. Execute plans and verify results + expectedStatsList = new ArrayList<>(); + expectedStats1 = createExpectedStatsMap(4, 0, 3, 0, 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, incrementalClock); + } + /* Scenario: test Append Only with auditing, no version, allow duplicates and filter existing records when staging data is imported along with digest field population */ @@ -400,7 +638,7 @@ void testAppendOnlyWithStagingDataImportedWithPopulateDigest() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) @@ -439,7 +677,7 @@ void testAppendOnlyWithLessColumnsInStaging() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) @@ -480,7 +718,7 @@ void testAppendOnlyDoNotCreateTables() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(NoAuditing.builder().build()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index b0dfc3d65a8..5eb28d99c2c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -32,9 +32,13 @@ import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.VersionColumnBasedResolver; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.NumericalValue; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.finos.legend.engine.persistence.components.versioning.TestDedupAndVersioning; @@ -45,7 +49,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.logging.Filter; import static org.finos.legend.engine.persistence.components.TestUtils.*; @@ -1007,4 +1010,79 @@ void testNonTemporalDeltaWithAllVersionDigestBasedAndStagingFilters() throws Exc executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); } + @Test + void testNonTemporalDeltaWithAllVersionDigestBasedAndFilteredDataset() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingDataset = DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersionAndBatch) + .build(); + + createStagingTableWithoutPks(stagingDataset); + FilteredDataset stagingTable = FilteredDataset.builder() + .group(testSchemaName) + .name(stagingTableName) + .schema(TestDedupAndVersioning.baseSchemaWithVersion) + .filter(Equals.of(FieldValue.builder() + .fieldName(batchName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(1L))) + .build(); + String path = "src/test/resources/data/incremental-delta-milestoning/input/with_staging_filter/with_all_version/digest_based/data1.csv"; + TestDedupAndVersioning.loadDataIntoStagingTableWithVersionAndBatch(path); + + // Generate the milestoning object + NontemporalDelta ingestMode = NontemporalDelta.builder() + .digestField(digestName) + .auditing(NoAuditing.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, versionName, incomeName, expiryDateName, digestName}; + + // ------------ Perform incremental (delta) milestoning Pass1 ------------------------ + String expectedDataPass1 = basePath + "expected/with_staging_filter/with_all_version/digest_based/expected_pass1.csv"; + // 2. Execute plans and verify results + List> expectedStatsList = new ArrayList<>(); + Map expectedStats1 = new HashMap<>(); + expectedStats1.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats1.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats1.put(StatisticName.ROWS_DELETED.name(), 0); + Map expectedStats2 = new HashMap<>(); + expectedStats2.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); + expectedStats2.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats2.put(StatisticName.ROWS_DELETED.name(), 0); + Map expectedStats3 = new HashMap<>(); + expectedStats3.put(StatisticName.INCOMING_RECORD_COUNT.name(), 1); + expectedStats3.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats3.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStatsList.add(expectedStats1); + expectedStatsList.add(expectedStats2); + expectedStatsList.add(expectedStats3); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass1, expectedStatsList, fixedClock_2000_01_01); + + // ------------ Perform incremental (delta) milestoning Pass2 Filter Duplicates ------------------------ + String expectedDataPass2 = basePath + "expected/with_staging_filter/with_all_version/digest_based/expected_pass2.csv"; + expectedStatsList = new ArrayList<>(); + Map expectedStats4 = new HashMap<>(); + expectedStats4.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats4.put(StatisticName.ROWS_TERMINATED.name(), 0); + expectedStats4.put(StatisticName.ROWS_DELETED.name(), 0); + expectedStatsList.add(expectedStats4); + stagingTable = stagingTable.withFilter(Equals.of(FieldValue.builder() + .fieldName(batchName) + .datasetRefAlias(stagingTableName) + .build(), NumericalValue.of(2L))); + datasets = Datasets.of(mainTable, stagingTable); + executePlansAndVerifyResultsWithDerivedDataSplits(ingestMode, options, datasets, schema, expectedDataPass2, expectedStatsList, fixedClock_2000_01_01); + } } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index 9e1ce6ca59b..61b05cc6966 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -26,6 +26,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.versioning.NoVersioningStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.versioning.TestDedupAndVersioning; import org.junit.jupiter.api.Assertions; @@ -95,6 +96,48 @@ void testNontemporalSnapshotNoAuditing() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); } + /* + Scenario: Test Nontemporal Snapshot with no auditing + */ + @Test + void testNontemporalSnapshotNoAuditingWithFilteredDataset() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + FilteredDataset stagingTable = TestUtils.getFilteredStagingTable(); + + // Create staging table + DatasetDefinition stagingTableForDB = TestUtils.getStagingTableWithFilterForDB(); + createStagingTable(stagingTableForDB); + + // Generate the milestoning object + NontemporalSnapshot ingestMode = NontemporalSnapshot.builder().auditing(NoAuditing.builder().build()).build(); + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName}; + + // ------------ Perform snapshot milestoning Pass1 ------------------------ + String dataPass1 = basePath + "input/with_staging_filter/data_pass1.csv"; + String expectedDataPass1 = basePath + "expected/with_staging_filter/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithFilter(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(5, 0, 5, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats); + + // ------------ Perform snapshot milestoning Pass2 ------------------------ + // 0. Create new filter + datasets = Datasets.of(mainTable, TestUtils.getFilteredStagingTableSecondPass()); + String dataPass2 = basePath + "input/with_staging_filter/data_pass2.csv"; + String expectedDataPass2 = basePath + "expected/with_staging_filter/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithFilter(dataPass2); + // 2. Execute plans and verify results + expectedStats.clear(); + expectedStats = createExpectedStatsMap(3, 5, 3, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); + } + /* Scenario: Test Nontemporal Snapshot when auditing is enabled */ @@ -352,6 +395,4 @@ void testNontemporalSnapshotWithFailOnDupsNoVersioning() throws Exception Assertions.assertEquals("Encountered Duplicates, Failing the batch as Fail on Duplicates is set as Deduplication strategy", e.getMessage()); } } - - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java index c5a6709584f..01adb8d1f12 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaTest.java @@ -30,6 +30,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.junit.jupiter.api.Assertions; @@ -886,6 +887,65 @@ void testMilestoningWithFilterStagingTableWithMaxVersioningGreaterThanWithDedupW executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); } + @Test + void testMilestoningWithMaxVersioningGreaterThanWithDedupWithFilteredDatasetWithUpperCase() throws Exception + { + DatasetDefinition mainTable = TestUtils.getUnitemporalMainTableWithVersion(); + FilteredDataset stagingTable = TestUtils.getFilteredStagingTableWithVersion(); + + String[] schema = new String[]{idName.toUpperCase(), nameName.toUpperCase(), incomeName.toUpperCase(), startTimeName.toUpperCase(), expiryDateName.toUpperCase(), digestName.toUpperCase(), versionName.toUpperCase(), batchIdInName.toUpperCase(), batchIdOutName.toUpperCase(), batchTimeInName.toUpperCase(), batchTimeOutName.toUpperCase()}; + + // Create staging table + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"ID\" INTEGER NOT NULL,\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"START_TIME\" TIMESTAMP NOT NULL,\"EXPIRY_DATE\" DATE,\"DIGEST\" VARCHAR,\"VERSION\" INT,\"BATCH\" INT,PRIMARY KEY (\"ID\", \"START_TIME\", \"VERSION\", \"BATCH\"))"); + + UnitemporalDelta ingestMode = UnitemporalDelta.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)) + .performStageVersioning(true) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // ------------ Perform Pass1 ------------------------ + String dataPass1 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than/with_dedup/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataWithFilterWithVersionInUpperCase(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform Pass2 ------------------------ + // 0. Create new filter + datasets = Datasets.of(mainTable, TestUtils.getFilteredStagingTableWithVersionSecondPass()); + String dataPass2 = basePathForInput + "with_staging_filter/with_max_versioning/greater_than/with_dedup/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than/with_dedup/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataWithFilterWithVersionInUpperCase(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(9, 0, 1, 1, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform Pass3 empty batch (No Impact) ------------------------- + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "with_staging_filter/with_max_versioning/greater_than/with_dedup/expected_pass3.csv"; + // 1. Load staging table + loadStagingDataWithFilterWithVersionInUpperCase(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); + } + @Test void testMilestoningWithMaxVersioningFail() throws Exception { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java index 8ab15d09f9f..f4642b4039a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java @@ -26,6 +26,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.junit.jupiter.api.Assertions; @@ -526,6 +527,70 @@ void testUnitemporalSnapshotMilestoningLogicMaxVersionWithPartitionFilterDuplica executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); } + /* + Scenario: Test milestoning Logic with max version and with Partition when staging table pre populated + */ + @Test + void testUnitemporalSnapshotMilestoningLogicMaxVersionWithPartitionFilterDuplicatesWithFilteredDataset() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + FilteredDataset stagingTable = TestUtils.getEntityPriceWithVersionFilteredStagingTable(); + + String[] schema = new String[]{dateName, entityName, priceName, volumeName, digestName, versionName, batchIdInName, batchIdOutName, batchTimeInName, batchTimeOutName}; + + // Create staging table + createStagingTableWithoutPks(TestUtils.getEntityPriceWithVersionStagingTable()); + + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .addAllPartitionFields(Collections.singletonList(dateName)) + .versioningStrategy(MaxVersionStrategy.builder() + .versioningField(versionName) + .mergeDataVersionResolver(DigestBasedResolver.builder().build()) + .performStageVersioning(true) + .build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).cleanupStagingData(false).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ + String dataPass1 = basePathForInput + "with_staging_filter/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "with_staging_filter/expected_pass1.csv"; + // 1. Load staging table + loadStagingDataForWithPartitionWithVersion(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(6, 0, 4, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ + // 0. Create new filter + datasets = Datasets.of(mainTable, TestUtils.getEntityPriceWithVersionFilteredStagingTableSecondPass()); + String dataPass2 = basePathForInput + "with_staging_filter/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "with_staging_filter/expected_pass2.csv"; + // 1. Load staging table + loadStagingDataForWithPartitionWithVersion(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(1, 0, 1, 0, 2); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ + String dataPass3 = "src/test/resources/data/empty_file.csv"; + String expectedDataPass3 = basePathForExpected + "with_staging_filter/expected_pass3.csv"; + // 1. Load Staging table + loadStagingDataForWithPartitionWithVersion(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + } + /* Scenario: Test milestoning Logic with max version and with Partition when staging table pre populated with upper case */ diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java index b59eb9776c2..f350510e5e1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/SchemaEvolutionTest.java @@ -17,11 +17,10 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; -import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; @@ -31,7 +30,6 @@ import org.junit.jupiter.api.Test; import java.util.Arrays; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -57,7 +55,7 @@ void testAddColumn() throws Exception createTempTable(mainTable); AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -109,7 +107,7 @@ void testDataTypeConversion() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -162,7 +160,7 @@ void testDataTypeSizeChange() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -217,7 +215,7 @@ void testColumnNullabilityChange() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -270,7 +268,7 @@ void testDataTypeConversionAndColumnNullabilityChange() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -325,7 +323,7 @@ void testDataTypeConversionAndDataTypeSizeChange() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -380,7 +378,7 @@ void testMakeMainColumnNullable() throws Exception createTempTable(mainTable); AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); @@ -433,7 +431,7 @@ void testSchemaEvolutionFailPKTypeDifferent() throws Exception // Generate the milestoning object AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestName) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestName).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeName).build()) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java index 984e5ab7277..30e7c601748 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/versioning/TestDedupAndVersioning.java @@ -26,6 +26,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; @@ -188,7 +189,7 @@ void testNoDedupAllVersioningDoNotPerform() Datasets datasets = Datasets.of(mainTable, stagingTable); IngestMode ingestMode = AppendOnly.builder() .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version").performStageVersioning(false).mergeDataVersionResolver(DigestBasedResolver.INSTANCE).build()) .build(); @@ -207,7 +208,7 @@ void testNoDedupAllVersion() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); IngestMode ingestMode = AppendOnly.builder() .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(true).build()) @@ -323,7 +324,7 @@ void testFilterDupsAllVersionDoNotPerform() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); IngestMode ingestMode = AppendOnly.builder() .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(false).build()) @@ -348,7 +349,7 @@ void testFilterDupsAllVersion() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); IngestMode ingestMode = AppendOnly.builder() .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(true).build()) @@ -489,7 +490,7 @@ void testFailOnDupsAllVersionDoNotPerform() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); IngestMode ingestMode = AppendOnly.builder() .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) .deduplicationStrategy(FailOnDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(false).build()) @@ -529,7 +530,7 @@ void testFailOnDupsAllVersion() throws Exception Datasets datasets = Datasets.of(mainTable, stagingTable); IngestMode ingestMode = AppendOnly.builder() .auditing(DateTimeAuditing.builder().dateTimeField("append_time").build()) - .digestField("digest") + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField("digest").build()) .deduplicationStrategy(FailOnDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder().versioningField("version") .mergeDataVersionResolver(DigestBasedResolver.INSTANCE).performStageVersioning(true).build()) @@ -659,6 +660,4 @@ private void verifyResults(String expectedDataPath, String [] schema) throws IOE List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", tempStagingTableName)); TestUtils.assertFileAndTableDataEquals(schema, expectedDataPath, tableData); } - - } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass1.csv new file mode 100644 index 00000000000..203200dfdd5 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass1.csv @@ -0,0 +1,3 @@ +1,1,DIGEST1,2022-01-02 00:00:00.0,2022-01-05 00:00:00.0,1,999999999 +1,2,DIGEST2,2022-01-05 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 +2,3,DIGEST3,2022-01-02 00:00:00.0,9999-12-31 23:59:59.0,1,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass2.csv new file mode 100644 index 00000000000..ca1c9be0742 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/expected/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/expected_pass2.csv @@ -0,0 +1,12 @@ +1,1,DIGEST1,2022-01-02 00:00:00.0,2022-01-05 00:00:00.0,1,1 +1,2,DIGEST2,2022-01-05 00:00:00.0,9999-12-31 23:59:59.0,1,1 +2,3,DIGEST3,2022-01-02 00:00:00.0,9999-12-31 23:59:59.0,1,1 +1,4,DIGEST4,2022-01-01 00:00:00.0,2022-01-02 00:00:00.0,2,999999999 +1,5,DIGEST5,2022-01-03 00:00:00.0,2022-01-04 00:00:00.0,2,999999999 +1,6,DIGEST6,2022-01-04 00:00:00.0,2022-01-05 00:00:00.0,2,999999999 +1,11,DIGEST11,2022-01-05 00:00:00.0,2022-01-07 00:00:00.0,2,999999999 +1,7,DIGEST7,2022-01-07 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +2,8,DIGEST8,2022-01-02 00:00:00.0,2022-01-05 00:00:00.0,2,999999999 +2,10,DIGEST10,2022-01-05 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +3,9,DIGEST9,2022-01-07 00:00:00.0,9999-12-31 23:59:59.0,2,999999999 +1,1,DIGEST1,2022-01-02 00:00:00.0,2022-01-03 00:00:00.0,2,999999999 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass1.csv new file mode 100644 index 00000000000..27a901a4005 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass1.csv @@ -0,0 +1,5 @@ +1,2022-01-02 00:00:00.0,1,DIGEST1 +1,2022-01-05 00:00:00.0,2,DIGEST2 +2,2022-01-02 00:00:00.0,3,DIGEST3 +2,2022-01-05 00:00:00.0,4,DIGEST4 +3,2022-01-02 00:00:00.0,5,DIGEST5 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass2.csv new file mode 100644 index 00000000000..365aef453f6 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bitemporal-incremental-milestoning/input/batch_id_based/source_specifies_from/without_delete_ind/set_6_with_staging_filter/staging_data_pass2.csv @@ -0,0 +1,10 @@ +1,2022-01-01 00:00:00.0,4,DIGEST4 +1,2022-01-02 00:00:00.0,100,DIGEST12 +1,2022-01-03 00:00:00.0,5,DIGEST5 +1,2022-01-04 00:00:00.0,6,DIGEST6 +1,2022-01-05 00:00:00.0,11,DIGEST11 +1,2022-01-07 00:00:00.0,7,DIGEST7 +2,2022-01-02 00:00:00.0,8,DIGEST8 +2,2022-01-05 00:00:00.0,10,DIGEST10 +2,2022-01-06 00:00:00.0,21,DIGEST13 +3,2022-01-07 00:00:00.0,9,DIGEST9 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass1.csv new file mode 100644 index 00000000000..fb8bd28722f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass1.csv @@ -0,0 +1,3 @@ +HARRY,1000,2022-12-01,f7cd27572192ed441b479526e4741677 +ROBERT,2000,2022-12-02,c1e270e47dd44183847e26022491a0f4 +ANDY,3000,2022-12-03,ddc893bc350aba9292db1e1ea5dff172 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass2.csv new file mode 100644 index 00000000000..4774adb615c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation/expected_pass2.csv @@ -0,0 +1,6 @@ +HARRY,1000,2022-12-01,f7cd27572192ed441b479526e4741677 +ROBERT,2000,2022-12-02,c1e270e47dd44183847e26022491a0f4 +ANDY,3000,2022-12-03,ddc893bc350aba9292db1e1ea5dff172 +ROBERT,2000,2022-12-02,c1e270e47dd44183847e26022491a0f4 +ANDY,3100,2022-12-03,508b8dba2c48cf5ea84f8133ebb12d90 +MATT,4000,2022-12-06,ea65523c56a461d57df1ca1dd476dc89 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass1.csv new file mode 100644 index 00000000000..e3ff7537069 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass1.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,1,dd94f3aa99f1a1160323db30bc81824d,2000-01-01 00:00:02.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,1,412f2c7dd39d0e8f2a84d13633af1202,2000-01-01 00:00:02.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,1,6513c7a9913473b2f0b03013575d1751,2000-01-01 00:00:02.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,2,a6d9a840e1f527f844c694e41dedf2c7,2000-01-01 00:00:03.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass2.csv new file mode 100644 index 00000000000..68bd1bb3205 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/digest_generation2/expected_pass2.csv @@ -0,0 +1,7 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,1,dd94f3aa99f1a1160323db30bc81824d,2000-01-01 00:00:02.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,1,412f2c7dd39d0e8f2a84d13633af1202,2000-01-01 00:00:02.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,1,6513c7a9913473b2f0b03013575d1751,2000-01-01 00:00:02.0 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,2,a6d9a840e1f527f844c694e41dedf2c7,2000-01-01 00:00:03.0 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,1,dd94f3aa99f1a1160323db30bc81824d,2000-01-01 00:00:06.0 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,2,5f0f3f716a79862c3039f3bea3f4ad9a,2000-01-01 00:00:06.0 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,1,032e6539befc2378772bca93a2f8d591,2000-01-01 00:00:06.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass1.csv new file mode 100644 index 00000000000..95012897702 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass1.csv @@ -0,0 +1,2 @@ +ANDY,3000,2022-12-03 +MINDY,4000,2022-12-04 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass2.csv new file mode 100644 index 00000000000..bd89a58f93e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/expected/with_staging_filter/expected_pass2.csv @@ -0,0 +1,4 @@ +ANDY,3000,2022-12-03 +MINDY,4000,2022-12-04 +ROBERT,2000,2022-12-05 +MATT,4000,2022-12-05 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass1.csv new file mode 100644 index 00000000000..179b54e57d8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass1.csv @@ -0,0 +1,3 @@ +HARRY,1000,2022-12-01 +ROBERT,2000,2022-12-02 +ANDY,3000,2022-12-03 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass2.csv new file mode 100644 index 00000000000..7bf2b920ce4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation/data_pass2.csv @@ -0,0 +1,3 @@ +ROBERT,2000,2022-12-02 +ANDY,3100,2022-12-03 +MATT,4000,2022-12-06 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass1.csv new file mode 100644 index 00000000000..2694c70a227 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass1.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,1 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,1 +3,ANDY,4000,2020-01-03 00:00:00.0,2022-12-03,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass2.csv new file mode 100644 index 00000000000..38d09bb021a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/digest_generation2/data_pass2.csv @@ -0,0 +1,4 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,1 +2,ROBERT,2001,2020-01-02 00:00:00.0,2022-12-02,2 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,1 +4,SANDY,4000,2020-01-04 00:00:00.0,2022-12-04,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass1.csv new file mode 100644 index 00000000000..bd64f3e7b12 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass1.csv @@ -0,0 +1,4 @@ +HARRY,1000,2022-12-01 +ROBERT,2000,2022-12-02 +ANDY,3000,2022-12-03 +MINDY,4000,2022-12-04 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass2.csv new file mode 100644 index 00000000000..df1fb53595a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/incremental-append-milestoning/input/with_staging_filter/data_pass2.csv @@ -0,0 +1,3 @@ +ROBERT,2000,2022-12-05 +ANDY,999,2022-12-03 +MATT,4000,2022-12-05 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass1.csv new file mode 100644 index 00000000000..f51b12ce482 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass1.csv @@ -0,0 +1,5 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 +4,MICHEL,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4 +5,LIZA,5000,2020-01-05 00:00:00.0,2022-12-05,DIGEST5 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass2.csv new file mode 100644 index 00000000000..bf3890397c4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/expected/with_staging_filter/expected_pass2.csv @@ -0,0 +1,3 @@ +4,MICHEL,4001,2020-01-04 00:00:00.0,2022-12-04,DIGEST4_UPDATED +5,LIZA,5001,2020-01-05 00:00:00.0,2022-12-05,DIGEST5_UPDATED +6,MINDY,6001,2020-01-06 00:00:00.0,2022-12-06,DIGEST6 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass1.csv new file mode 100644 index 00000000000..4d5f3874a63 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass1.csv @@ -0,0 +1,8 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 +4,MICHEL,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,1 +5,LIZA,5000,2020-01-05 00:00:00.0,2022-12-05,DIGEST5,1 +4,MICHEL,4001,2020-01-04 00:00:00.0,2022-12-04,DIGEST4_UPDATED,2 +5,LIZA,5001,2020-01-05 00:00:00.0,2022-12-05,DIGEST5_UPDATED,2 +6,MINDY,6001,2020-01-06 00:00:00.0,2022-12-06,DIGEST6,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass2.csv new file mode 100644 index 00000000000..4d58ebb9356 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/snapshot-milestoning/input/with_staging_filter/data_pass2.csv @@ -0,0 +1,8 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1 +4,MICHEL,4000,2020-01-04 00:00:00.0,2022-12-04,DIGEST4,1 +5,LIZA,5000,2020-01-05 00:00:00.0,2022-12-05,DIGEST5,1 +4,MICHEL,4001,2020-01-04 00:00:00.0,2022-12-04,DIGEST4_UPDATED,2 +5,LIZA,5001,2020-01-05 00:00:00.0,2022-12-05,DIGEST5_UPDATED,2 +6,MINDY,6001,2020-01-06 00:00:00.0,2022-12-06,DIGEST6,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass1.csv new file mode 100644 index 00000000000..b7c23ba34bd --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass1.csv @@ -0,0 +1,4 @@ +2021-12-01,GS,383.82,300,DIGEST3_UPDATED2,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,100,DIGEST2,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,37800.00,999,DIGEST6,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,JPMX,159.83,200,DIGEST5_UPDATED,2,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass2.csv new file mode 100644 index 00000000000..71d5fc848e3 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass2.csv @@ -0,0 +1,5 @@ +2021-12-01,GS,383.82,300,DIGEST3_UPDATED2,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,100,DIGEST2,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,37800.00,999,DIGEST6,1,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,JPMX,159.83,200,DIGEST5_UPDATED,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,JPM,159.83,1000,DIGEST7,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass3.csv new file mode 100644 index 00000000000..71d5fc848e3 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_staging_filter/expected_pass3.csv @@ -0,0 +1,5 @@ +2021-12-01,GS,383.82,300,DIGEST3_UPDATED2,3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,100,DIGEST2,1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,37800.00,999,DIGEST6,1,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,JPMX,159.83,200,DIGEST5_UPDATED,2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,JPM,159.83,1000,DIGEST7,1,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass1.csv new file mode 100644 index 00000000000..699aa27b114 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass1.csv @@ -0,0 +1,9 @@ +2021-12-01,IBM,116.92,10,DIGEST1,1 +2021-12-01,JPM,161.00,100,DIGEST2,1 +2021-12-01,GS,383.82,10,DIGEST3,1 +2021-12-01,GS,383.82,200,DIGEST3_UPDATED1,2 +2021-12-01,GS,383.82,300,DIGEST3_UPDATED2,3 +2021-12-02,IBM,117.37,10,DIGEST4,1 +2021-12-02,JPMX,159.83,100,DIGEST5,1 +2021-12-02,JPMX,159.83,200,DIGEST5_UPDATED,2 +2021-12-02,GS,37800.00,999,DIGEST6,1 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass2.csv new file mode 100644 index 00000000000..2a3a853b910 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/input/batch_id_and_time_based/with_staging_filter/staging_data_pass2.csv @@ -0,0 +1,4 @@ +2021-12-02,IBM,117.37,10,DIGEST4,1 +2021-12-02,JPM,159.83,1000,DIGEST7,1 +2021-12-02,GS,378.00,0,DIGEST8,2 +2021-12-02,GS,378.00,0,DIGEST8,2 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/optimizer/StringCaseOptimizer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/optimizer/StringCaseOptimizer.java index 960f19296f5..a728db56743 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/optimizer/StringCaseOptimizer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/optimizer/StringCaseOptimizer.java @@ -46,8 +46,8 @@ public PhysicalPlanNode optimize(PhysicalPlanNode component) else if (component instanceof ShowCommand) { ShowCommand command = (ShowCommand) component; - command.setSchemaName(applyCase(command.getSchemaName())); - command.setDatabaseName(applyCase(command.getDatabaseName())); + command.setSchemaName(applyCase(command.getSchemaName().orElse(null))); + command.setDatabaseName(applyCase(command.getDatabaseName().orElse(null))); command.setTableName(applyCase(command.getTableName())); return command; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SQLCreateVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SQLCreateVisitor.java index cbeaa125245..6e07ff5b435 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SQLCreateVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SQLCreateVisitor.java @@ -35,7 +35,8 @@ public class SQLCreateVisitor implements LogicalPlanVisitor public VisitorResult visit(PhysicalPlanNode prev, Create current, VisitorContext context) { CreateTable createTable = new CreateTable(); - if (!current.dataset().schema().shardSpecification().isPresent() || current.dataset().schema().shardSpecification().get().shardKeys().size() == 0) + // if the table is not sharded, it will be created as REFERENCE TABLE in memsql + if (current.dataset().schema().shardSpecification().isPresent() && !current.dataset().schema().shardSpecification().get().isSharded()) { List tableTypeList = Arrays.asList(new ReferenceTableType()); createTable = new CreateTable(tableTypeList); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SchemaDefinitionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SchemaDefinitionVisitor.java index 340552b8160..d3f83511727 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SchemaDefinitionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/SchemaDefinitionVisitor.java @@ -85,18 +85,18 @@ public VisitorResult visit(PhysicalPlanNode prev, SchemaDefinition current, Visi prev.push(constraint); } - boolean isShard = current.shardSpecification().isPresent() && current.shardSpecification().get().shardKeys().size() > 0; + boolean hasShardKeys = current.shardSpecification().isPresent() && current.shardSpecification().get().shardKeys().size() > 0; // if table is sharded and primary keys are present if (isTableColumnStore) { - if (pkNum >= 1 && isShard) + if (pkNum >= 1 && hasShardKeys) { TableConstraint constraint = new UnenforcedUniqueIndexConstraint(pkFields.stream().map(Field::name).collect(Collectors.toList()), context.quoteIdentifier()); prev.push(constraint); } } - if (isShard) + if (hasShardKeys) { TableConstraint constraint = new ShardKeyConstraint(current.shardSpecification().get().shardKeys().stream().map(Field::name).collect(Collectors.toList()), context.quoteIdentifier()); prev.push(constraint); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/ShowVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/ShowVisitor.java index 473a0bd7126..3446298dd89 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/ShowVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sql/visitor/ShowVisitor.java @@ -28,35 +28,18 @@ public class ShowVisitor implements LogicalPlanVisitor @Override public VisitorResult visit(PhysicalPlanNode prev, Show current, VisitorContext context) { - ShowCommand command; - if (current.dataset().datasetReference().database().isPresent() && current.dataset().datasetReference().group().isPresent()) - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - current.dataset().datasetReference().database().get(), - current.dataset().datasetReference().group().get(), - current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - } - else if (current.dataset().datasetReference().group().isPresent()) - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - null, - current.dataset().datasetReference().group().get(), - current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - } - else - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - null, - null, - current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - } + ShowCommand command = new ShowCommand( + ShowType.valueOf(current.operation().name()), + current.dataset().datasetReference().database(), + current.dataset().datasetReference().group(), + current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new), + context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) { command = (ShowCommand) optimizer.optimize(command); } + prev.push(command); return new VisitorResult(null); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sqldom/schemaops/statements/ShowCommand.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sqldom/schemaops/statements/ShowCommand.java index 9e3f603e01d..0fabe5d2aba 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sqldom/schemaops/statements/ShowCommand.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/sqldom/schemaops/statements/ShowCommand.java @@ -20,41 +20,45 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.common.ShowType; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; +import java.util.Optional; + import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; public class ShowCommand implements SqlGen { private final ShowType operation; - private String databaseName; - private String schemaName; + private Optional databaseName; + private Optional schemaName; private String tableName; + private final String quoteIdentifier; - public ShowCommand(ShowType operation, String databaseName, String schemaName, String tableName) + public ShowCommand(ShowType operation, Optional databaseName, Optional schemaName, String tableName, String quoteIdentifier) { this.operation = operation; this.databaseName = databaseName; this.schemaName = schemaName; this.tableName = tableName; + this.quoteIdentifier = quoteIdentifier; } - public String getDatabaseName() + public Optional getDatabaseName() { return databaseName; } public void setDatabaseName(String databaseName) { - this.databaseName = databaseName; + this.databaseName = Optional.of(databaseName); } - public String getSchemaName() + public Optional getSchemaName() { return schemaName; } public void setSchemaName(String schemaName) { - this.schemaName = schemaName; + this.schemaName = Optional.of(schemaName); } public String getTableName() @@ -75,7 +79,7 @@ public void setTableName(String tableName) SHOW [FULL] [TEMPORARY] TABLES [{FROM | IN} db_name] - [[EXTENDED] LIKE pattern | WHERE TABLE_TYPE {= | !=} {'VIEW' | 'BASE TABLE'}] + [[EXTENDED] LIKE 'pattern' | WHERE TABLE_TYPE {= | !=} {'VIEW' | 'BASE TABLE'}] SHOW SCHEMAS [LIKE 'pattern'] @@ -91,33 +95,33 @@ public void genSql(StringBuilder builder) throws SqlDomException if (operation == ShowType.COLUMNS) { builder.append(WHITE_SPACE + Clause.FROM.get()); - builder.append(WHITE_SPACE + tableName); - if ((databaseName != null && !databaseName.isEmpty()) && (schemaName != null && !schemaName.isEmpty())) + builder.append(WHITE_SPACE + SqlGenUtils.getQuotedField(tableName, quoteIdentifier)); + if ((databaseName.isPresent() && !databaseName.get().isEmpty()) && (schemaName.isPresent() && !schemaName.get().isEmpty())) { builder.append(WHITE_SPACE); builder.append(Clause.IN.get() + WHITE_SPACE); - builder.append(databaseName + SqlGenUtils.DOT + schemaName); + builder.append(SqlGenUtils.getQuotedField(databaseName.get(), quoteIdentifier) + SqlGenUtils.DOT + SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } - else if (schemaName != null && !schemaName.isEmpty()) + else if (schemaName.isPresent() && !schemaName.get().isEmpty()) { builder.append(WHITE_SPACE); builder.append(Clause.IN.get() + WHITE_SPACE); - builder.append(schemaName); + builder.append(SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } } else if (operation == ShowType.TABLES) { - if ((databaseName != null && !databaseName.isEmpty()) && (schemaName != null && !schemaName.isEmpty())) + if ((databaseName.isPresent() && !databaseName.get().isEmpty()) && (schemaName.isPresent() && !schemaName.get().isEmpty())) { builder.append(WHITE_SPACE); builder.append(Clause.FROM.get() + WHITE_SPACE); - builder.append(databaseName + SqlGenUtils.DOT + schemaName); + builder.append(SqlGenUtils.getQuotedField(databaseName.get(), quoteIdentifier) + SqlGenUtils.DOT + SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } - else if (schemaName != null && !schemaName.isEmpty()) + else if (schemaName.isPresent() && !schemaName.get().isEmpty()) { builder.append(WHITE_SPACE); builder.append(Clause.FROM.get() + WHITE_SPACE); - builder.append(schemaName); + builder.append(SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } builder.append(WHITE_SPACE + Clause.LIKE.get()); builder.append(WHITE_SPACE); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index 20c714fa2a7..2ca43639eba 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -20,6 +20,7 @@ import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.memsql.MemSqlSink; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.List; @@ -250,4 +251,28 @@ public void verifyAppendOnlyWithAuditingFilterDupsMaxVersionNoFilterExistingReco Assertions.assertEquals(rowsInserted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); } + + @Override + @Test + public void testAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration() + { + // Digest UDF Generation not available for Memsql sink + } + + @Override + public void verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(GeneratorResult operations) + { + } + + @Override + @Test + public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration() + { + // Digest UDF Generation not available for Memsql sink + } + + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(List generatorResults, List dataSplitRanges) + { + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromTest.java index 9de07694b8c..c046af228c5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BitemporalDeltaSourceSpecifiesFromTest.java @@ -310,10 +310,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator @Override public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { - String tempName = operations.get(0).preActionsSql().get(2).split("CREATE REFERENCE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; - String tempWithDeleteIndicatorName = operations.get(0).preActionsSql().get(3).split("CREATE REFERENCE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; + String tempName = operations.get(0).preActionsSql().get(2).split("CREATE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; + String tempWithDeleteIndicatorName = operations.get(0).preActionsSql().get(3).split("CREATE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; - String expectedBitemporalFromOnlyDefaultTempTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS " + tempName + + String expectedBitemporalFromOnlyDefaultTempTableCreateQuery = "CREATE TABLE IF NOT EXISTS " + tempName + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -325,7 +325,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List operations, List dataSplitRanges) { - String tempName = operations.get(0).preActionsSql().get(2).split("CREATE REFERENCE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; - String tempWithDeleteIndicatorName = operations.get(0).preActionsSql().get(3).split("CREATE REFERENCE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; - String stageWithoutDuplicatesName = operations.get(0).preActionsSql().get(4).split("CREATE REFERENCE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; + String tempName = operations.get(0).preActionsSql().get(2).split("CREATE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; + String tempWithDeleteIndicatorName = operations.get(0).preActionsSql().get(3).split("CREATE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; + String stageWithoutDuplicatesName = operations.get(0).preActionsSql().get(4).split("CREATE TABLE IF NOT EXISTS ")[1].split("\\(")[0]; - String expectedBitemporalFromOnlyDefaultTempTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS " + tempName + + String expectedBitemporalFromOnlyDefaultTempTableCreateQuery = "CREATE TABLE IF NOT EXISTS " + tempName + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -782,7 +782,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - String expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS " + tempWithDeleteIndicatorName + + String expectedBitemporalFromOnlyDefaultTempTableWithDeleteIndicatorCreateQuery = "CREATE TABLE IF NOT EXISTS " + tempWithDeleteIndicatorName + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -795,7 +795,7 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu "`delete_indicator` TINYINT(1)," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - String expectedBitemporalFromOnlyStageWithDeleteIndicatorWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS " + stageWithoutDuplicatesName + + String expectedBitemporalFromOnlyStageWithDeleteIndicatorWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE TABLE IF NOT EXISTS " + stageWithoutDuplicatesName + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java index 12cb43e9b10..dbbb6f2a5e1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/MemsqlTestArtifacts.java @@ -16,28 +16,28 @@ public class MemsqlTestArtifacts { - public static String expectedBaseTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBaseTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`biz_date` DATE," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedStagingTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging`(" + + public static String expectedStagingTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`staging`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`biz_date` DATE," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedBaseTableCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + + public static String expectedBaseTableCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + "(`ID` INTEGER NOT NULL," + "`NAME` VARCHAR(256) NOT NULL," + "`AMOUNT` DOUBLE," + "`BIZ_DATE` DATE," + "PRIMARY KEY (`ID`, `NAME`))"; - public static String expectedBaseTablePlusDigestCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBaseTablePlusDigestCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -45,7 +45,7 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedStagingTableWithDigestCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging`(" + + public static String expectedStagingTableWithDigestCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`staging`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -53,49 +53,58 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedBaseTablePlusDigestPlusVersionCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + - "`id` INTEGER NOT NULL," + - "`name` VARCHAR(256) NOT NULL," + - "`amount` DOUBLE," + - "`biz_date` DATE," + - "`digest` VARCHAR(256)," + - "`version` INTEGER," + - "PRIMARY KEY (`id`, `name`))"; + public static String expectedBaseTablePlusDigestPlusVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + + "`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`digest` VARCHAR(256)," + + "`version` INTEGER," + + "PRIMARY KEY (`id`, `name`))"; + + public static String expectedBaseTempStagingTableWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`legend_persistence_count` INTEGER)"; + + public static String expectedBaseTempStagingTableWithVersionAndCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`digest` VARCHAR(256)," + + "`version` INTEGER," + + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTableWithCount = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`biz_date` DATE," + + "`digest` VARCHAR(256)," + "`legend_persistence_count` INTEGER)"; - public static String expectedBaseTempStagingTablePlusDigestWithCount = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + - "(`id` INTEGER NOT NULL," + - "`name` VARCHAR(256) NOT NULL," + - "`amount` DOUBLE," + - "`biz_date` DATE," + - "`digest` VARCHAR(256)," + - "`legend_persistence_count` INTEGER)"; - - public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + - "(`id` INTEGER NOT NULL," + - "`name` VARCHAR(256) NOT NULL," + - "`amount` DOUBLE," + - "`biz_date` DATE," + - "`digest` VARCHAR(256)," + - "`legend_persistence_count` INTEGER," + - "`data_split` INTEGER NOT NULL)"; - - public static String expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + - "`ID` INTEGER NOT NULL," + - "`NAME` VARCHAR(256) NOT NULL," + - "`AMOUNT` DOUBLE," + - "`BIZ_DATE` DATE," + - "`DIGEST` VARCHAR(256)," + - "`VERSION` INTEGER," + - "PRIMARY KEY (`ID`, `NAME`))"; - - public static String expectedBaseTablePlusDigestCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + + public static String expectedBaseTempStagingTablePlusDigestWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS `mydb`.`staging_legend_persistence_temp_staging`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`biz_date` DATE," + + "`digest` VARCHAR(256)," + + "`legend_persistence_count` INTEGER," + + "`data_split` INTEGER NOT NULL)"; + + public static String expectedBaseTablePlusDigestPlusVersionCreateQueryUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + + "`ID` INTEGER NOT NULL," + + "`NAME` VARCHAR(256) NOT NULL," + + "`AMOUNT` DOUBLE," + + "`BIZ_DATE` DATE," + + "`DIGEST` VARCHAR(256)," + + "`VERSION` INTEGER," + + "PRIMARY KEY (`ID`, `NAME`))"; + + public static String expectedBaseTablePlusDigestCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + "`ID` INTEGER NOT NULL," + "`NAME` VARCHAR(256) NOT NULL," + "`AMOUNT` DOUBLE," + @@ -103,32 +112,32 @@ public class MemsqlTestArtifacts "`DIGEST` VARCHAR(256)," + "PRIMARY KEY (`ID`, `NAME`))"; - public static String expectedBaseTableCreateQueryWithNoPKs = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBaseTableCreateQueryWithNoPKs = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER," + "`name` VARCHAR(256)," + "`amount` DOUBLE," + "`biz_date` DATE," + "`digest` VARCHAR(256))"; - public static String expectedStagingTableCreateQueryWithNoPKs = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging`(" + + public static String expectedStagingTableCreateQueryWithNoPKs = "CREATE TABLE IF NOT EXISTS `mydb`.`staging`(" + "`id` INTEGER," + "`name` VARCHAR(256)," + "`amount` DOUBLE," + "`biz_date` DATE," + "`digest` VARCHAR(256))"; - public static String expectedBaseTableCreateQueryWithAuditAndNoPKs = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBaseTableCreateQueryWithAuditAndNoPKs = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER,`name` VARCHAR(256),`amount` DOUBLE,`biz_date` DATE,`digest` VARCHAR(256),`batch_update_time` DATETIME)"; - public static String expectedMainTableBatchIdAndVersionBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedMainTableBatchIdAndVersionBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL,`name` VARCHAR(256) NOT NULL,`amount` DOUBLE,`biz_date` DATE,`digest` VARCHAR(256),`version` INTEGER," + "`batch_id_in` INTEGER NOT NULL,`batch_id_out` INTEGER,PRIMARY KEY (`id`, `name`, `batch_id_in`))"; - public static String expectedMainTableBatchIdAndVersionBasedCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + + public static String expectedMainTableBatchIdAndVersionBasedCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + "(`ID` INTEGER NOT NULL,`NAME` VARCHAR(256) NOT NULL,`AMOUNT` DOUBLE,`BIZ_DATE` DATE,`DIGEST` VARCHAR(256),`VERSION` INTEGER,`BATCH_ID_IN` INTEGER NOT NULL," + "`BATCH_ID_OUT` INTEGER,PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`))"; - public static String expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -137,16 +146,16 @@ public class MemsqlTestArtifacts "`batch_update_time` DATETIME NOT NULL," + "PRIMARY KEY (`id`, `name`, `batch_update_time`))"; - public static String expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + - "`ID` INTEGER NOT NULL," + - "`NAME` VARCHAR(256) NOT NULL," + - "`AMOUNT` DOUBLE," + - "`BIZ_DATE` DATE," + - "`DIGEST` VARCHAR(256)," + - "`BATCH_UPDATE_TIME` DATETIME NOT NULL," + - "PRIMARY KEY (`ID`, `NAME`, `BATCH_UPDATE_TIME`))"; + public static String expectedBaseTablePlusDigestPlusUpdateTimestampCreateQueryUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`(" + + "`ID` INTEGER NOT NULL," + + "`NAME` VARCHAR(256) NOT NULL," + + "`AMOUNT` DOUBLE," + + "`BIZ_DATE` DATE," + + "`DIGEST` VARCHAR(256)," + + "`BATCH_UPDATE_TIME` DATETIME NOT NULL," + + "PRIMARY KEY (`ID`, `NAME`, `BATCH_UPDATE_TIME`))"; - public static String expectedBaseTableWithAuditNotPKCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBaseTableWithAuditNotPKCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -155,7 +164,7 @@ public class MemsqlTestArtifacts "`batch_update_time` DATETIME," + "PRIMARY KEY (`id`, `name`))"; - public static String expectedBaseTableWithAuditPKCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBaseTableWithAuditPKCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -171,11 +180,11 @@ public class MemsqlTestArtifacts public static String cleanUpMainTableSql = "DELETE FROM `mydb`.`main` as sink"; public static String cleanupMainTableSqlUpperCase = "DELETE FROM `MYDB`.`MAIN` as sink"; - public static String expectedMainTableBatchIdBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedMainTableBatchIdBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL,`name` VARCHAR(256) NOT NULL,`amount` DOUBLE,`biz_date` DATE,`digest` VARCHAR(256)," + "`batch_id_in` INTEGER NOT NULL,`batch_id_out` INTEGER,PRIMARY KEY (`id`, `name`, `batch_id_in`))"; - public static String expectedMetadataTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS batch_metadata" + + public static String expectedMetadataTableCreateQuery = "CREATE TABLE IF NOT EXISTS batch_metadata" + "(`table_name` VARCHAR(255)," + "`batch_start_ts_utc` DATETIME," + "`batch_end_ts_utc` DATETIME," + @@ -183,7 +192,7 @@ public class MemsqlTestArtifacts "`table_batch_id` INTEGER," + "`staging_filters` JSON)"; - public static String expectedMetadataTableCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS BATCH_METADATA" + + public static String expectedMetadataTableCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS BATCH_METADATA" + "(`TABLE_NAME` VARCHAR(255)," + "`BATCH_START_TS_UTC` DATETIME," + "`BATCH_END_TS_UTC` DATETIME," + @@ -191,7 +200,7 @@ public class MemsqlTestArtifacts "`TABLE_BATCH_ID` INTEGER," + "`STAGING_FILTERS` JSON)"; - public static String expectedMainTableBatchIdBasedCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + + public static String expectedMainTableBatchIdBasedCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + "(`ID` INTEGER NOT NULL,`NAME` VARCHAR(256) NOT NULL,`AMOUNT` DOUBLE,`BIZ_DATE` DATE,`DIGEST` VARCHAR(256)," + "`BATCH_ID_IN` INTEGER NOT NULL,`BATCH_ID_OUT` INTEGER,PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`))"; @@ -200,11 +209,11 @@ public class MemsqlTestArtifacts public static String expectedMetadataTableIngestQueryWithUpperCase = "INSERT INTO BATCH_METADATA (`TABLE_NAME`, `TABLE_BATCH_ID`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`)" + " (SELECT 'MAIN',(SELECT COALESCE(MAX(BATCH_METADATA.`TABLE_BATCH_ID`),0)+1 FROM BATCH_METADATA as BATCH_METADATA WHERE UPPER(BATCH_METADATA.`TABLE_NAME`) = 'MAIN'),'2000-01-01 00:00:00.000000',CURRENT_TIMESTAMP(),'DONE')"; - + public static String expectedMetadataTableIngestQueryWithPlaceHolders = "INSERT INTO batch_metadata (`table_name`, `table_batch_id`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`) " + "(SELECT 'main',{BATCH_ID_PATTERN},'{BATCH_START_TS_PATTERN}','{BATCH_END_TS_PATTERN}','DONE')"; - public static String expectedMainTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedMainTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -216,7 +225,7 @@ public class MemsqlTestArtifacts "`batch_time_out` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`))"; - public static String expectedMainTableCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + + public static String expectedMainTableCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + "(`ID` INTEGER NOT NULL," + "`NAME` VARCHAR(256) NOT NULL," + "`AMOUNT` DOUBLE," + @@ -228,15 +237,15 @@ public class MemsqlTestArtifacts "`BATCH_TIME_OUT` DATETIME," + "PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`))"; - public static String expectedMainTableTimeBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedMainTableTimeBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL,`name` VARCHAR(256) NOT NULL,`amount` DOUBLE,`biz_date` DATE,`digest` VARCHAR(256)," + "`batch_time_in` DATETIME NOT NULL,`batch_time_out` DATETIME,PRIMARY KEY (`id`, `name`, `batch_time_in`))"; - public static String expectedMainTableTimeBasedCreateQueryWithUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + + public static String expectedMainTableTimeBasedCreateQueryWithUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + "(`ID` INTEGER NOT NULL,`NAME` VARCHAR(256) NOT NULL,`AMOUNT` DOUBLE,`BIZ_DATE` DATE,`DIGEST` VARCHAR(256)," + "`BATCH_TIME_IN` DATETIME NOT NULL,`BATCH_TIME_OUT` DATETIME,PRIMARY KEY (`ID`, `NAME`, `BATCH_TIME_IN`))"; - public static String expectedBitemporalMainTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -247,7 +256,7 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalStagingTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging`(" + + public static String expectedBitemporalStagingTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`staging`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -256,7 +265,7 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; - public static String expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableWithVersionWithBatchIdDatetimeCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -270,7 +279,7 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalMainTableWithVersionBatchDateTimeCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -282,37 +291,37 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_time_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyMainTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalFromOnlyMainTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`digest` VARCHAR(256)," + + "`batch_id_in` INTEGER NOT NULL," + + "`batch_id_out` INTEGER," + + "`validity_from_target` DATETIME NOT NULL," + + "`validity_through_target` DATETIME," + + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; + + public static String expectedBitemporalFromOnlyMainTableWithVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`batch_id_in` INTEGER NOT NULL," + "`batch_id_out` INTEGER," + "`validity_from_target` DATETIME NOT NULL," + "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyMainTableWithVersionCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + - "(`id` INTEGER NOT NULL," + - "`name` VARCHAR(256) NOT NULL," + - "`amount` DOUBLE," + - "`digest` VARCHAR(256)," + - "`version` INTEGER," + - "`batch_id_in` INTEGER NOT NULL," + - "`batch_id_out` INTEGER," + - "`validity_from_target` DATETIME NOT NULL," + - "`validity_through_target` DATETIME," + - "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - - public static String expectedBitemporalFromOnlyStagingTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`staging`" + + public static String expectedBitemporalFromOnlyStagingTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`staging`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE,`validity_from_reference` DATETIME NOT NULL," + "`digest` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; - public static String expectedBitemporalFromOnlyMainTableBatchIdAndTimeBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`" + + public static String expectedBitemporalFromOnlyMainTableBatchIdAndTimeBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -325,7 +334,7 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyMainTableDateTimeBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`main`(" + + public static String expectedBitemporalFromOnlyMainTableDateTimeBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`main`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -336,7 +345,7 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_time_in`, `validity_from_target`))"; - public static String expectedBitemporalMainTableCreateQueryUpperCase = "CREATE REFERENCE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + + public static String expectedBitemporalMainTableCreateQueryUpperCase = "CREATE TABLE IF NOT EXISTS `MYDB`.`MAIN`" + "(`ID` INTEGER NOT NULL," + "`NAME` VARCHAR(256) NOT NULL," + "`AMOUNT` DOUBLE," + @@ -347,30 +356,30 @@ public class MemsqlTestArtifacts "`VALIDITY_THROUGH_TARGET` DATETIME," + "PRIMARY KEY (`ID`, `NAME`, `BATCH_ID_IN`, `VALIDITY_FROM_TARGET`))"; - public static String expectedBitemporalFromOnlyTempTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`temp`" + + public static String expectedBitemporalFromOnlyTempTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`temp`" + + "(`id` INTEGER NOT NULL," + + "`name` VARCHAR(256) NOT NULL," + + "`amount` DOUBLE," + + "`digest` VARCHAR(256)," + + "`batch_id_in` INTEGER NOT NULL," + + "`batch_id_out` INTEGER," + + "`validity_from_target` DATETIME NOT NULL," + + "`validity_through_target` DATETIME," + + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; + + public static String expectedBitemporalFromOnlyTempTableWithVersionCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`temp`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + "`digest` VARCHAR(256)," + + "`version` INTEGER," + "`batch_id_in` INTEGER NOT NULL," + "`batch_id_out` INTEGER," + "`validity_from_target` DATETIME NOT NULL," + "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyTempTableWithVersionCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`temp`" + - "(`id` INTEGER NOT NULL," + - "`name` VARCHAR(256) NOT NULL," + - "`amount` DOUBLE," + - "`digest` VARCHAR(256)," + - "`version` INTEGER," + - "`batch_id_in` INTEGER NOT NULL," + - "`batch_id_out` INTEGER," + - "`validity_from_target` DATETIME NOT NULL," + - "`validity_through_target` DATETIME," + - "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - - public static String expectedBitemporalFromOnlyTempTableBatchIdAndTimeBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`temp`(" + + public static String expectedBitemporalFromOnlyTempTableBatchIdAndTimeBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`temp`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -383,7 +392,7 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyTempTableDateTimeBasedCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`temp`(" + + public static String expectedBitemporalFromOnlyTempTableDateTimeBasedCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`temp`(" + "`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -394,7 +403,7 @@ public class MemsqlTestArtifacts "`validity_through_target` DATETIME," + "PRIMARY KEY (`id`, `name`, `batch_time_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyStageWithoutDuplicatesTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + + public static String expectedBitemporalFromOnlyStageWithoutDuplicatesTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -402,7 +411,7 @@ public class MemsqlTestArtifacts "`digest` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`))"; - public static String expectedBitemporalFromOnlyTempTableWithDeleteIndicatorCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`tempWithDeleteIndicator`" + + public static String expectedBitemporalFromOnlyTempTableWithDeleteIndicatorCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`tempWithDeleteIndicator`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -414,7 +423,7 @@ public class MemsqlTestArtifacts "`delete_indicator` VARCHAR(256)," + "PRIMARY KEY (`id`, `name`, `batch_id_in`, `validity_from_target`))"; - public static String expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + + public static String expectedBitemporalFromOnlyStageWithVersionWithDataSplitWithoutDuplicatesTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -424,7 +433,7 @@ public class MemsqlTestArtifacts "`data_split` BIGINT NOT NULL," + "PRIMARY KEY (`id`, `name`, `validity_from_reference`, `data_split`))"; - public static String expectedBitemporalFromOnlyStageWithDeleteIndicatorWithoutDuplicatesTableCreateQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + + public static String expectedBitemporalFromOnlyStageWithDeleteIndicatorWithoutDuplicatesTableCreateQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`stagingWithoutDuplicates`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -443,39 +452,43 @@ public class MemsqlTestArtifacts "as stage WHERE stage.`legend_persistence_rank` = 1)"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + - "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + - "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`)"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + - "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM " + - "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + - "WHERE stage.`legend_persistence_rank` = 1)"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM " + + "`mydb`.`staging` as stage GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage) as stage " + + "WHERE stage.`legend_persistence_rank` = 1)"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithAllVersionAndFilterDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + - "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + - "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; + "(`id`, `name`, `amount`, `biz_date`, `digest`, `legend_persistence_count`, `data_split`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` ASC) as `data_split` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, stage.`digest`) as stage)"; public static String maxDupsErrorCheckSql = "SELECT MAX(stage.`legend_persistence_count`) as `MAX_DUPLICATES` FROM " + "`mydb`.`staging_legend_persistence_temp_staging` as stage"; - public static String dataErrorCheckSql = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + public static String dataErrorCheckSqlForBizDateAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `biz_date`) as stage"; + public static String dataErrorCheckSqlForVersionAsVersion = "SELECT MAX(`legend_persistence_distinct_rows`) as `MAX_DATA_ERRORS` FROM " + + "(SELECT COUNT(DISTINCT(`digest`)) as `legend_persistence_distinct_rows` FROM " + + "`mydb`.`staging_legend_persistence_temp_staging` as stage GROUP BY `id`, `name`, `version`) as stage"; + public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + - "(`id`, `name`, `amount`, `biz_date`, `digest`) " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + - "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + - "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + - "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,DENSE_RANK() " + + "OVER (PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`biz_date` DESC) as `legend_persistence_rank` " + + "FROM `mydb`.`staging` as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; public static String expectedTempStagingCleanupQueryInUpperCase = "DELETE FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage"; public static String expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicatesUpperCase = "INSERT INTO `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` " + @@ -489,4 +502,4 @@ public class MemsqlTestArtifacts public static String dataErrorCheckSqlUpperCase = "SELECT MAX(`LEGEND_PERSISTENCE_DISTINCT_ROWS`) as `MAX_DATA_ERRORS` " + "FROM (SELECT COUNT(DISTINCT(`DIGEST`)) as `LEGEND_PERSISTENCE_DISTINCT_ROWS` " + "FROM `MYDB`.`STAGING_LEGEND_PERSISTENCE_TEMP_STAGING` as stage GROUP BY `ID`, `NAME`, `BIZ_DATE`) as stage"; -} +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java index e5daeed87f7..84a76261b96 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaTest.java @@ -326,6 +326,36 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); } + @Override + public void verifyNontemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + + String updateSql = "UPDATE `mydb`.`main` as sink " + + "INNER JOIN " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage " + + "WHERE (stage.`biz_date` > '2020-01-10') OR ((stage.`biz_date` > '2020-01-01') AND (stage.`biz_date` < '2020-01-05'))) as stage " + + "ON ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`) " + + "SET sink.`id` = stage.`id`,sink.`name` = stage.`name`,sink.`amount` = stage.`amount`,sink.`biz_date` = stage.`biz_date`,sink.`digest` = stage.`digest`"; + + String insertSql = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest` FROM `mydb`.`staging` as stage WHERE (NOT (EXISTS " + + "(SELECT * FROM `mydb`.`main` as sink WHERE (sink.`id` = stage.`id`) AND " + + "(sink.`name` = stage.`name`)))) AND ((stage.`biz_date` > '2020-01-10') OR ((stage.`biz_date` > '2020-01-01') AND (stage.`biz_date` < '2020-01-05'))))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + + String incomingRecordCount = "SELECT COUNT(*) as `incomingRecordCount` FROM `mydb`.`staging` as stage WHERE (stage.`biz_date` > '2020-01-10') OR ((stage.`biz_date` > '2020-01-01') AND (stage.`biz_date` < '2020-01-05'))"; + // Stats + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + @Override public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java index 5719e415d13..3eb99c652fa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdBasedTest.java @@ -21,9 +21,10 @@ import org.finos.legend.engine.persistence.components.testcases.ingestmode.unitemporal.UnitmemporalDeltaBatchIdBasedTestCases; import org.junit.jupiter.api.Assertions; -import java.util.ArrayList; import java.util.List; +import static org.finos.legend.engine.persistence.components.common.DedupAndVersionErrorStatistics.MAX_DATA_ERRORS; + public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @Override @@ -317,6 +318,35 @@ public void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithStagingFilters("{\"batch_id_in\":{\"GT\":5}}"), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink SET sink.`batch_id_out` = " + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + + "WHERE (sink.`batch_id_out` = 999999999) AND (EXISTS (SELECT * FROM `mydb`.`staging` as stage " + + "WHERE (((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (sink.`digest` <> stage.`digest`)) " + + "AND (stage.`batch_id_in` > 5)))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `batch_id_in`, `batch_id_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging` as stage " + + "WHERE (NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) " + + "AND (sink.`digest` = stage.`digest`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`))))) " + + "AND (stage.`batch_id_in` > 5))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations) { @@ -339,12 +369,74 @@ public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(Gene Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_rank` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "stage.`version`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "WHERE stage.`batch_id_in` > 5 GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, " + + "stage.`digest`, stage.`version`) as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithStagingFilters("{\"batch_id_in\":{\"GT\":5}}"), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata " + + "WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 WHERE (sink.`batch_id_out` = 999999999) AND " + + "(EXISTS (SELECT * FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)) AND (stage.`version` > sink.`version`)))"; + + String expectedUpsertQuery = "INSERT INTO `mydb`.`main` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `batch_id_in`, `batch_id_out`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "(SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE " + + "UPPER(batch_metadata.`table_name`) = 'MAIN'),999999999 FROM `mydb`.`staging_legend_persistence_temp_staging` as stage " + + "WHERE NOT (EXISTS (SELECT * FROM `mydb`.`main` as sink WHERE (sink.`batch_id_out` = 999999999) AND " + + "(stage.`version` <= sink.`version`) AND ((sink.`id` = stage.`id`) AND (sink.`name` = stage.`name`)))))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdAndVersionBasedCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedBaseTempStagingTableWithVersionAndCount, preActionsSql.get(2)); + + String expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters = "INSERT INTO `mydb`.`staging_legend_persistence_temp_staging` " + + "(`id`, `name`, `amount`, `biz_date`, `digest`, `version`, `legend_persistence_count`) " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count` FROM " + + "(SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`,stage.`version`," + + "stage.`legend_persistence_count` as `legend_persistence_count`,DENSE_RANK() OVER " + + "(PARTITION BY stage.`id`,stage.`name` ORDER BY stage.`version` DESC) as `legend_persistence_rank` " + + "FROM (SELECT stage.`id`,stage.`name`,stage.`amount`,stage.`biz_date`,stage.`digest`," + + "stage.`version`,COUNT(*) as `legend_persistence_count` FROM `mydb`.`staging` as stage " + + "WHERE stage.`batch_id_in` > 5 GROUP BY stage.`id`, stage.`name`, stage.`amount`, stage.`biz_date`, " + + "stage.`digest`, stage.`version`) as stage) as stage WHERE stage.`legend_persistence_rank` = 1)"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, operations.deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(expectedInsertIntoBaseTempStagingWithMaxVersionFilterDupsWithStagingFilters, operations.deduplicationAndVersioningSql().get(1)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForVersionAsVersion, operations.deduplicationAndVersioningErrorChecksSql().get(MAX_DATA_ERRORS)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java index 02a21455a2b..3bedbebbb25 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalDeltaBatchIdDateTimeBasedTest.java @@ -315,7 +315,7 @@ public void verifyUnitemporalDeltaWithOnlySchemaSet(GeneratorResult operations) List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedCreateMainTableQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `my_schema`.`main`" + + String expectedCreateMainTableQuery = "CREATE TABLE IF NOT EXISTS `my_schema`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -360,7 +360,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothSet(GeneratorResult operati List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedCreateMainTableQuery = "CREATE REFERENCE TABLE IF NOT EXISTS `mydb`.`my_schema`.`main`" + + String expectedCreateMainTableQuery = "CREATE TABLE IF NOT EXISTS `mydb`.`my_schema`.`main`" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + @@ -405,7 +405,7 @@ public void verifyUnitemporalDeltaWithDbAndSchemaBothNotSet(GeneratorResult oper List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedCreateMainTableQuery = "CREATE REFERENCE TABLE IF NOT EXISTS main" + + String expectedCreateMainTableQuery = "CREATE TABLE IF NOT EXISTS main" + "(`id` INTEGER NOT NULL," + "`name` VARCHAR(256) NOT NULL," + "`amount` DOUBLE," + diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 2112ff4dedc..a793c829593 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -93,7 +93,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDedupMaxVersion(Generator Assertions.assertEquals(MemsqlTestArtifacts.expectedTempStagingCleanupQuery, deduplicationAndVersioningSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndAllowDuplicates, deduplicationAndVersioningSql.get(1)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index 0f96eeb95b6..ab72f14d23e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -98,7 +98,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionFailOnDupsMaxVersion(Genera Assertions.assertEquals(MemsqlTestArtifacts.expectedInsertIntoBaseTempStagingPlusDigestWithMaxVersionAndFilterDuplicates, deduplicationAndVersioningSql.get(1)); Assertions.assertEquals(MemsqlTestArtifacts.maxDupsErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DUPLICATES)); - Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSql, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); + Assertions.assertEquals(MemsqlTestArtifacts.dataErrorCheckSqlForBizDateAsVersion, deduplicationAndVersioningErrorChecksSql.get(DedupAndVersionErrorStatistics.MAX_DATA_ERRORS)); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/BaseTestUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/BaseTestUtils.java index cd88eb60ad8..9dd725f2d3e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/BaseTestUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/BaseTestUtils.java @@ -46,25 +46,25 @@ public class BaseTestUtils public static Index index = Index.builder().indexName("my_idx").addAllColumns(Arrays.asList("col_int")).build(); public static SchemaDefinition schemaWithAllColumns = SchemaDefinition.builder() - .addFields(colInt) - .addFields(colInteger) - .addFields(colBigint) - .addFields(colTinyint) - .addFields(colSmallint) - .addFields(colChar) - .addFields(colVarchar) - .addFields(colTimestamp) - .addFields(colDatetime) - .addFields(colDate) - .addFields(colReal) - .addFields(colFloat) - .addFields(colDecimal) - .addFields(colDouble) - .addFields(colTime) - .addFields(colBoolean) - .build(); + .addFields(colInt) + .addFields(colInteger) + .addFields(colBigint) + .addFields(colTinyint) + .addFields(colSmallint) + .addFields(colChar) + .addFields(colVarchar) + .addFields(colTimestamp) + .addFields(colDatetime) + .addFields(colDate) + .addFields(colReal) + .addFields(colFloat) + .addFields(colDecimal) + .addFields(colDouble) + .addFields(colTime) + .addFields(colBoolean) + .build(); - public static SchemaDefinition schemaWithAllColumnsWithShardColumnStore = SchemaDefinition.builder() + public static SchemaDefinition schemaWithAllColumnsWithShardsAndColumnStore = SchemaDefinition.builder() .addFields(colInt) .addFields(colInteger) .addFields(colBigint) @@ -72,14 +72,23 @@ public class BaseTestUtils .shardSpecification(ShardSpecification.builder().addShardKeys(colInt).build()) .build(); - public static SchemaDefinition schemaWithColumnStoreWithoutShard = SchemaDefinition.builder() + public static SchemaDefinition schemaWithColumnStoreUnsharded = SchemaDefinition.builder() + .addFields(colInt) + .addFields(colInteger) + .addFields(colBigint) + .columnStoreSpecification(ColumnStoreSpecification.builder().columnStore(true).addColumnStoreKeys(colInt).build()) + .shardSpecification(ShardSpecification.builder().isSharded(false).build()) + .build(); + + public static SchemaDefinition schemaWithColumnStoreWithKeylessSharding = SchemaDefinition.builder() .addFields(colInt) .addFields(colInteger) .addFields(colBigint) .columnStoreSpecification(ColumnStoreSpecification.builder().columnStore(true).addColumnStoreKeys(colInt).build()) + .shardSpecification(ShardSpecification.builder().isSharded(true).build()) .build(); - public static SchemaDefinition schemaWithRowStoreWithoutShard = SchemaDefinition.builder() + public static SchemaDefinition schemaWithRowStoreWithoutShardDefinition = SchemaDefinition.builder() .addFields(colInt) .addFields(colInteger) .addFields(colBigint) @@ -94,25 +103,25 @@ public class BaseTestUtils .build(); public static SchemaDefinition schemaWithColumnStore = SchemaDefinition.builder() - .addFields(colInt) - .addFields(colInteger) - .addFields(colBigint) - .addFields(colTinyint) - .addFields(colSmallint) - .addFields(colChar) - .addFields(colVarchar) - .addFields(colTimestamp) - .addFields(colDatetime) - .addFields(colDate) - .addFields(colReal) - .addFields(colFloat) - .addFields(colDecimal) - .addFields(colDouble) - .addFields(colTime) - .addFields(colBoolean) - .columnStoreSpecification(ColumnStoreSpecification.builder() - .columnStore(true) - .addColumnStoreKeys(colBigint) - .build()) - .build(); -} + .addFields(colInt) + .addFields(colInteger) + .addFields(colBigint) + .addFields(colTinyint) + .addFields(colSmallint) + .addFields(colChar) + .addFields(colVarchar) + .addFields(colTimestamp) + .addFields(colDatetime) + .addFields(colDate) + .addFields(colReal) + .addFields(colFloat) + .addFields(colDecimal) + .addFields(colDouble) + .addFields(colTime) + .addFields(colBoolean) + .columnStoreSpecification(ColumnStoreSpecification.builder() + .columnStore(true) + .addColumnStoreKeys(colBigint) + .build()) + .build(); +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTest.java index bbcbbee8715..5acc259ef77 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTest.java @@ -30,26 +30,21 @@ import java.util.List; import java.util.Optional; -import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.schemaWithAllColumnsWithShardColumnStore; -import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.schemaWithColumnStoreWithoutShard; -import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.schemaWithRowStoreWithoutShard; -import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.schemaWithRowStoreShards; -import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.schemaWithAllColumns; -import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.schemaWithColumnStore; +import static org.finos.legend.engine.persistence.components.logicalplan.operations.BaseTestUtils.*; public class CreateTest { @Test - public void testCreateTableWithColumnStoreShards() + public void testCreateTableWithColumnStoreSharded() { DatasetDefinition dataset = DatasetDefinition.builder() - .database("my_db") - .group("my_schema") - .name("my_table") - .alias("my_alias") - .schema(schemaWithAllColumnsWithShardColumnStore) - .build(); + .database("my_db") + .group("my_schema") + .name("my_table") + .alias("my_alias") + .schema(schemaWithAllColumnsWithShardsAndColumnStore) + .build(); Operation createTable = Create.of(true,dataset); @@ -65,14 +60,14 @@ public void testCreateTableWithColumnStoreShards() } @Test - public void testCreateTableWithColumnStoreWithoutShards() + public void testCreateTableWithColumnStoreUnsharded() { DatasetDefinition dataset = DatasetDefinition.builder() .database("my_db") .group("my_schema") .name("my_table") .alias("my_alias") - .schema(schemaWithColumnStoreWithoutShard) + .schema(schemaWithColumnStoreUnsharded) .build(); Operation createTable = Create.of(true,dataset); @@ -89,14 +84,14 @@ public void testCreateTableWithColumnStoreWithoutShards() } @Test - public void testCreateTableWithRowStoreWithoutShards() + public void testCreateTableWithColumnStoreWithKeylessSharding() { DatasetDefinition dataset = DatasetDefinition.builder() .database("my_db") .group("my_schema") .name("my_table") .alias("my_alias") - .schema(schemaWithRowStoreWithoutShard) + .schema(schemaWithColumnStoreWithKeylessSharding) .build(); Operation createTable = Create.of(true,dataset); @@ -107,7 +102,31 @@ public void testCreateTableWithRowStoreWithoutShards() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedAdd = "CREATE REFERENCE TABLE IF NOT EXISTS `my_db`.`my_schema`.`my_table`(`col_int` INTEGER NOT NULL PRIMARY KEY,`col_integer` INTEGER NOT NULL UNIQUE,`col_bigint` BIGINT,INDEX `my_idx` (`col_int`))"; + String expectedAdd = "CREATE TABLE IF NOT EXISTS `my_db`.`my_schema`.`my_table`(`col_int` INTEGER NOT NULL PRIMARY KEY,`col_integer` INTEGER NOT NULL UNIQUE,`col_bigint` BIGINT,KEY CLUSTERED_COLUMN_INDEX (`col_int`) USING CLUSTERED COLUMNSTORE)"; + + Assertions.assertEquals(expectedAdd, list.get(0)); + } + + @Test + public void testCreateTableWithRowStoreWithoutShardDefinition() + { + DatasetDefinition dataset = DatasetDefinition.builder() + .database("my_db") + .group("my_schema") + .name("my_table") + .alias("my_alias") + .schema(schemaWithRowStoreWithoutShardDefinition) + .build(); + + Operation createTable = Create.of(true,dataset); + + RelationalTransformer transformer = new RelationalTransformer(MemSqlSink.get()); + + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(createTable).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + + String expectedAdd = "CREATE TABLE IF NOT EXISTS `my_db`.`my_schema`.`my_table`(`col_int` INTEGER NOT NULL PRIMARY KEY,`col_integer` INTEGER NOT NULL UNIQUE,`col_bigint` BIGINT,INDEX `my_idx` (`col_int`))"; Assertions.assertEquals(expectedAdd, list.get(0)); } @@ -141,12 +160,12 @@ public void testCreateTableWithRowStoreWithShards() public void testAlterTableWithUpperCase() { DatasetDefinition dataset = DatasetDefinition.builder() - .database("my_db") - .group("my_schema") - .name("my_table") - .alias("my_alias") - .schema(schemaWithAllColumns) - .build(); + .database("my_db") + .group("my_schema") + .name("my_table") + .alias("my_alias") + .schema(schemaWithAllColumns) + .build(); Field column = Field.builder().name("column").type(FieldType.of(DataType.VARCHAR, 64, null)).nullable(false).build(); Field newColumn = Field.builder().name("column1").type(FieldType.of(DataType.VARCHAR, 64, null)).nullable(false).build(); @@ -159,8 +178,8 @@ public void testAlterTableWithUpperCase() LogicalPlan logicalPlan = LogicalPlan.builder().addOps(add, changeDatatype, nullableColumn, dropColumn, renameColumn).build(); RelationalTransformer transformer = new RelationalTransformer( - MemSqlSink.get(), - TransformOptions.builder().addOptimizers(new UpperCaseOptimizer()).build()); + MemSqlSink.get(), + TransformOptions.builder().addOptimizers(new UpperCaseOptimizer()).build()); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); @@ -182,11 +201,11 @@ public void testAlterTableWithUpperCase() public void testAlterTableChangeDataTypeAndColumnStore() { DatasetDefinition dataset = DatasetDefinition.builder() - .database("my_db") - .group("my_schema") - .name("my_table") - .schema(schemaWithColumnStore) - .build(); + .database("my_db") + .group("my_schema") + .name("my_table") + .schema(schemaWithColumnStore) + .build(); Field column = Field.builder().name("column").type(FieldType.of(DataType.VARCHAR, 64, null)).nullable(false).build(); Operation changeDataType = Alter.of(dataset, Alter.AlterOperation.CHANGE_DATATYPE, column, Optional.empty()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java index e7b46bf87d8..90af5d7706a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java @@ -40,7 +40,7 @@ public void testShowTablesCommand() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW TABLES FROM CITIBIKE.public LIKE 'trips'"; + String expected = "SHOW TABLES FROM `CITIBIKE`.`public` LIKE 'trips'"; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -54,7 +54,7 @@ public void testShowTablesCommandWithUpperCase() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW TABLES FROM CITIBIKE.PUBLIC LIKE 'TRIPS'"; + String expected = "SHOW TABLES FROM `CITIBIKE`.`PUBLIC` LIKE 'TRIPS'"; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -82,7 +82,7 @@ public void testShowTablesCommandWithSchemaWithoutDb() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW TABLES FROM public LIKE 'trips'"; + String expected = "SHOW TABLES FROM `public` LIKE 'trips'"; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -110,7 +110,7 @@ public void testShowColumnsCommand() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW COLUMNS FROM trips IN CITIBIKE.public"; + String expected = "SHOW COLUMNS FROM `trips` IN `CITIBIKE`.`public`"; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -124,7 +124,7 @@ public void testShowColumnsCommandWithoutSchema() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW COLUMNS FROM trips"; + String expected = "SHOW COLUMNS FROM `trips`"; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -138,7 +138,7 @@ public void testShowColumnsCommandWithSchemaWithoutDb() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW COLUMNS FROM trips IN public"; + String expected = "SHOW COLUMNS FROM `trips` IN `public`"; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/FileFormat.java similarity index 82% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/FileFormat.java index 75cf32a3a55..afd3b7148dd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/FileFormat.java @@ -12,12 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.common; +package org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets; -public enum FileFormat +public interface FileFormat { - CSV, - JSON, - AVRO, - PARQUET; + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java index c3f190f6b2e..751698e6ad2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java @@ -18,7 +18,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; -import java.util.List; +import java.util.Map; import java.util.Optional; @Value.Immutable @@ -33,5 +33,7 @@ public interface SnowflakeStagedFilesDatasetPropertiesAbstract extends StagedFil { String location(); - Optional fileFormat(); + Optional fileFormat(); + + Map copyOptions(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/StandardFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/StandardFileFormatAbstract.java new file mode 100644 index 00000000000..a117e6ecbb4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/StandardFileFormatAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets; + +import org.finos.legend.engine.persistence.components.common.FileFormatType; +import org.immutables.value.Value; + +import java.util.Map; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface StandardFileFormatAbstract extends FileFormat +{ + FileFormatType formatType(); + + Map formatOptions(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/ExternalVolume.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/UserDefinedFileFormatAbstract.java similarity index 59% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/ExternalVolume.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/UserDefinedFileFormatAbstract.java index ca942ee9a5c..b55a25112be 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/ExternalVolume.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/UserDefinedFileFormatAbstract.java @@ -12,22 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.common; -import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +package org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets; -public class ExternalVolume implements SqlGen -{ - private String externalVolume; - - public ExternalVolume(String externalVolume) - { - this.externalVolume = externalVolume; - } +import org.immutables.value.Value; - @Override - public void genSql(StringBuilder builder) - { - builder.append(String.format("with EXTERNAL_VOLUME = '%s'", externalVolume)); - } +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface UserDefinedFileFormatAbstract extends FileFormat +{ + @Value.Parameter(order = 0) + String formatName(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/optmizer/StringCaseOptimizer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/optmizer/StringCaseOptimizer.java index 3b57ef53797..f2cb3de201e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/optmizer/StringCaseOptimizer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/optmizer/StringCaseOptimizer.java @@ -46,8 +46,8 @@ public PhysicalPlanNode optimize(PhysicalPlanNode component) else if (component instanceof ShowCommand) { ShowCommand command = (ShowCommand) component; - command.setSchemaName(applyCase(command.getSchemaName())); - command.setDatabaseName(applyCase(command.getDatabaseName())); + command.setSchemaName(applyCase(command.getSchemaName().orElse(null))); + command.setDatabaseName(applyCase(command.getDatabaseName().orElse(null))); command.setTableName(applyCase(command.getTableName())); return command; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java index e77fd16878e..c476b8ec0c7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/CopyVisitor.java @@ -17,26 +17,66 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.FileFormat; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.UserDefinedFileFormat; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.statements.CopyStatement; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Optional; public class CopyVisitor implements LogicalPlanVisitor { - @Override public VisitorResult visit(PhysicalPlanNode prev, Copy current, VisitorContext context) { + SnowflakeStagedFilesDatasetProperties properties = (SnowflakeStagedFilesDatasetProperties) current.stagedFilesDatasetProperties(); CopyStatement copyStatement = new CopyStatement(); + setCopyStatementProperties(properties, copyStatement); prev.push(copyStatement); List logicalPlanNodes = new ArrayList<>(); logicalPlanNodes.add(current.sourceDataset()); logicalPlanNodes.add(current.targetDataset()); logicalPlanNodes.addAll(current.fields()); + return new VisitorResult(copyStatement, logicalPlanNodes); } + + private static void setCopyStatementProperties(SnowflakeStagedFilesDatasetProperties properties, CopyStatement copyStatement) + { + copyStatement.setFilePatterns(properties.filePatterns()); + copyStatement.setFilePaths(properties.filePaths()); + + // Add default option into the map + Map copyOptions = new HashMap<>(properties.copyOptions()); + if (!copyOptions.containsKey("ON_ERROR") && !copyOptions.containsKey("on_error")) + { + copyOptions.put("ON_ERROR", "ABORT_STATEMENT"); + } + copyStatement.setCopyOptions(copyOptions); + + Optional fileFormat = properties.fileFormat(); + if (fileFormat.isPresent()) + { + FileFormat format = properties.fileFormat().get(); + if (format instanceof UserDefinedFileFormat) + { + UserDefinedFileFormat userDefinedFileFormat = (UserDefinedFileFormat) format; + copyStatement.setUserDefinedFileFormatName(userDefinedFileFormat.formatName()); + } + else if (format instanceof StandardFileFormat) + { + StandardFileFormat standardFileFormat = (StandardFileFormat) format; + copyStatement.setFileFormatType(standardFileFormat.formatType()); + copyStatement.setFileFormatOptions(standardFileFormat.formatOptions()); + } + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DatasetAdditionalPropertiesVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DatasetAdditionalPropertiesVisitor.java index 2f3cf631879..aca8b6e2f39 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DatasetAdditionalPropertiesVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DatasetAdditionalPropertiesVisitor.java @@ -14,9 +14,9 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.IcebergProperties; import org.finos.legend.engine.persistence.components.logicalplan.datasets.TableOrigin; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; -import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.common.ExternalVolume; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.tabletypes.IcebergTableType; import org.finos.legend.engine.persistence.components.relational.sqldom.tabletypes.TableType; @@ -39,9 +39,13 @@ protected void handleTags(PhysicalPlanNode prev, Map tags) prev.push(tags); } - protected void handleExternalVolume(PhysicalPlanNode prev, String externalVolume) + protected void handleIcebergProperties(PhysicalPlanNode prev, IcebergProperties icebergProperties) { - prev.push(new ExternalVolume(externalVolume)); + prev.push(new org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.common.IcebergProperties( + icebergProperties.catalog(), + icebergProperties.externalVolume(), + icebergProperties.baseLocation() + )); } private TableType mapTableType(TableOrigin tableOrigin) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/ShowVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/ShowVisitor.java index c618d5cc387..f262b998355 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/ShowVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/ShowVisitor.java @@ -28,31 +28,13 @@ public class ShowVisitor implements LogicalPlanVisitor @Override public VisitorResult visit(PhysicalPlanNode prev, Show current, VisitorContext context) { - ShowCommand command; - if (current.dataset().datasetReference().database().isPresent() && current.dataset().datasetReference().group().isPresent()) - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - current.dataset().datasetReference().database().get(), - current.dataset().datasetReference().group().get(), - current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - } - else if (current.dataset().datasetReference().group().isPresent()) - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - null, - current.dataset().datasetReference().group().get(), - current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - } - else - { - command = new ShowCommand( - ShowType.valueOf(current.operation().name()), - null, - null, - current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - } + ShowCommand command = new ShowCommand( + ShowType.valueOf(current.operation().name()), + current.dataset().datasetReference().database(), + current.dataset().datasetReference().group(), + current.dataset().datasetReference().name().orElseThrow(IllegalStateException::new), + context.quoteIdentifier()); + for (Optimizer optimizer : context.optimizers()) { command = (ShowCommand) optimizer.optimize(command); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java index ccb15d743cb..286d60ae4af 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -34,8 +34,6 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu } SnowflakeStagedFilesDatasetProperties datasetProperties = (SnowflakeStagedFilesDatasetProperties) current.properties(); StagedFilesTable stagedFiles = new StagedFilesTable(datasetProperties.location()); - datasetProperties.fileFormat().ifPresent(stagedFiles::setFileFormat); - stagedFiles.setFilePattern(datasetProperties.files().stream().map(s -> '(' + s + ')').collect(Collectors.joining("|"))); current.alias().ifPresent(stagedFiles::setAlias); prev.push(stagedFiles); return new VisitorResult(null); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/IcebergProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/IcebergProperties.java new file mode 100644 index 00000000000..f0712f8c703 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/common/IcebergProperties.java @@ -0,0 +1,54 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.common; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; + +public class IcebergProperties implements SqlGen +{ + private final String catalog; + private final String externalVolume; + private final String baseLocation; + + private static final String CATALOG = "CATALOG"; + private static final String EXTERNAL_VOLUME = "EXTERNAL_VOLUME"; + private static final String BASE_LOCATION = "BASE_LOCATION"; + + public IcebergProperties(String catalog, String externalVolume, String baseLocation) + { + this.catalog = catalog; + this.externalVolume = externalVolume; + this.baseLocation = baseLocation; + } + + @Override + public void genSql(StringBuilder builder) + { + builder.append(CATALOG); + builder.append(SqlGenUtils.ASSIGNMENT_OPERATOR); + builder.append(SqlGenUtils.singleQuote(catalog)); + builder.append(SqlGenUtils.COMMA + SqlGenUtils.WHITE_SPACE); + + builder.append(EXTERNAL_VOLUME); + builder.append(SqlGenUtils.ASSIGNMENT_OPERATOR); + builder.append(SqlGenUtils.singleQuote(externalVolume)); + builder.append(SqlGenUtils.COMMA + SqlGenUtils.WHITE_SPACE); + + builder.append(BASE_LOCATION); + builder.append(SqlGenUtils.ASSIGNMENT_OPERATOR); + builder.append(SqlGenUtils.singleQuote(baseLocation)); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/expressions/table/StagedFilesTable.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/expressions/table/StagedFilesTable.java index 2bb2eac9f63..946183301ca 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/expressions/table/StagedFilesTable.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/expressions/table/StagedFilesTable.java @@ -18,16 +18,9 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.TableLike; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.StringUtils; -import java.util.ArrayList; -import java.util.List; - -import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.*; - public class StagedFilesTable extends TableLike { private String location; - private String fileFormat; - private String filePattern; public StagedFilesTable(String location) { @@ -53,22 +46,6 @@ public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException { validate(); builder.append(location); - // Add FILE_FORMAT, PATTERN - if (StringUtils.notEmpty(fileFormat) || StringUtils.notEmpty(filePattern)) - { - builder.append(WHITE_SPACE + OPEN_PARENTHESIS); - List options = new ArrayList<>(); - if (StringUtils.notEmpty(fileFormat)) - { - options.add(String.format("FILE_FORMAT => '%s'", fileFormat)); - } - if (StringUtils.notEmpty(filePattern)) - { - options.add(String.format("PATTERN => '%s'", filePattern)); - } - builder.append(String.join(COMMA + WHITE_SPACE, options)); - builder.append(CLOSING_PARENTHESIS); - } } @Override @@ -84,14 +61,4 @@ void validate() throws SqlDomException throw new SqlDomException("location is mandatory"); } } - - public void setFileFormat(String fileFormat) - { - this.fileFormat = fileFormat; - } - - public void setFilePattern(String filePattern) - { - this.filePattern = filePattern; - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java index 04480fb8659..6560bbc0edb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java @@ -14,16 +14,23 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.statements; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Field; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.StringUtils; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.ASSIGNMENT_OPERATOR; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; @@ -34,10 +41,16 @@ public class CopyStatement implements DMLStatement private Table table; private final List columns; private SelectStatement selectStatement; + private List filePatterns; + private List filePaths; + private String userDefinedFileFormatName; + private FileFormatType fileFormatType; + private Map fileFormatOptions; + private Map copyOptions; public CopyStatement() { - columns = new ArrayList<>(); + this.columns = new ArrayList<>(); } public CopyStatement(Table table, List columns, SelectStatement selectStatement) @@ -49,12 +62,16 @@ public CopyStatement(Table table, List columns, SelectStatement selectSta /* Copy GENERIC PLAN for Snowflake: - COPY INTO [.] (COLUMN_LIST) - FROM - ( SELECT [.]$[:] [ , [.]$[:] , ... ] - FROM { | } - [ ( FILE_FORMAT => '.', PATTERN => '' ) ] [ ] ) - on_error = 'ABORT_STATEMENT' + -------------------------------- + COPY INTO [.] (COLUMN_LIST) + FROM + ( SELECT [.]$[.] [ , [.]$[.] ... ] + FROM { internalStage | externalStage } ) + [ FILES = ( '' [ , '' ] [ , ... ] ) ] + [ PATTERN = '' ] + [ FILE_FORMAT = ( { FORMAT_NAME = '[.]' | + TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] } ) ] + [ copyOptions ] */ @Override @@ -88,8 +105,67 @@ public void genSql(StringBuilder builder) throws SqlDomException selectStatement.genSql(builder); builder.append(CLOSING_PARENTHESIS); - builder.append(WHITE_SPACE); - builder.append("on_error = 'ABORT_STATEMENT'"); + // File Paths + if (filePaths != null && !filePaths.isEmpty()) + { + String filePathsStr = filePaths.stream().map(path -> SqlGenUtils.singleQuote(path)).collect(Collectors.joining(", ")); + builder.append(String.format(" FILES = (%s)", filePathsStr)); + } + // File Patterns + else if (filePatterns != null && !filePatterns.isEmpty()) + { + String filePatternStr = filePatterns.stream().map(s -> '(' + s + ')').collect(Collectors.joining("|")); + builder.append(String.format(" PATTERN = '%s'", filePatternStr)); + } + + // FILE_FORMAT + if (StringUtils.notEmpty(userDefinedFileFormatName)) + { + builder.append(String.format(" FILE_FORMAT = (FORMAT_NAME = '%s')", userDefinedFileFormatName)); + } + else if (fileFormatType != null) + { + builder.append(" FILE_FORMAT = "); + builder.append(OPEN_PARENTHESIS); + fileFormatOptions = new HashMap<>(fileFormatOptions); + fileFormatOptions.put("TYPE", fileFormatType.name()); + addOptions(fileFormatOptions, builder); + builder.append(CLOSING_PARENTHESIS); + } + + // Add copy Options + if (copyOptions != null && !copyOptions.isEmpty()) + { + builder.append(WHITE_SPACE); + addOptions(copyOptions, builder); + } + } + + + private void addOptions(Map options, StringBuilder builder) + { + if (options != null && options.size() > 0) + { + int ctr = 0; + for (String option : options.keySet().stream().sorted().collect(Collectors.toList())) + { + ctr++; + builder.append(option); + builder.append(WHITE_SPACE + ASSIGNMENT_OPERATOR + WHITE_SPACE); + if (options.get(option) instanceof String) + { + builder.append(SqlGenUtils.singleQuote(options.get(option))); + } + else + { + builder.append(options.get(option)); + } + if (ctr < options.size()) + { + builder.append(COMMA + WHITE_SPACE); + } + } + } } @Override @@ -121,4 +197,34 @@ void validate() throws SqlDomException throw new SqlDomException("table is mandatory for Copy Table Command"); } } + + public void setFilePatterns(List filePatterns) + { + this.filePatterns = filePatterns; + } + + public void setFilePaths(List filePaths) + { + this.filePaths = filePaths; + } + + public void setUserDefinedFileFormatName(String userDefinedFileFormatName) + { + this.userDefinedFileFormatName = userDefinedFileFormatName; + } + + public void setFileFormatType(FileFormatType fileFormatType) + { + this.fileFormatType = fileFormatType; + } + + public void setFileFormatOptions(Map fileFormatOptions) + { + this.fileFormatOptions = fileFormatOptions; + } + + public void setCopyOptions(Map copyOptions) + { + this.copyOptions = copyOptions; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java index 7f34cf5a513..4653d29fcf7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.statements; -import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.common.ExternalVolume; +import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.common.IcebergProperties; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.tabletypes.IcebergTableType; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; @@ -38,15 +38,12 @@ public class CreateTable implements DDLStatement { private Table table; private final List modifiers; // dataset - - private ExternalVolume externalVolume; private final List columns; // schema private final List tableConstraints; // table level private final List types; // dataset private final List clusterKeys; private Map tags; - - private static final String ICEBERG_CATALOG_INTEGRATION_SUFFIX = "ICEBERG_TABLE_2022 = true"; + private IcebergProperties icebergProperties; public CreateTable() { @@ -80,14 +77,6 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(WHITE_SPACE); table.genSqlWithoutAlias(builder); - // External Volume - if (externalVolume != null) - { - builder.append(WHITE_SPACE); - externalVolume.genSql(builder); - builder.append(WHITE_SPACE); - } - // Columns + table constraints builder.append(OPEN_PARENTHESIS); SqlGen.genSqlList(builder, columns, EMPTY, COMMA); @@ -121,10 +110,11 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(CLOSING_PARENTHESIS); } - // Iceberg unified Catalog suppoprt + // Iceberg Unified Catalog support if (types.stream().anyMatch(tableType -> tableType instanceof IcebergTableType)) { - builder.append(WHITE_SPACE + ICEBERG_CATALOG_INTEGRATION_SUFFIX); + builder.append(WHITE_SPACE); + icebergProperties.genSql(builder); } } @@ -156,9 +146,9 @@ else if (node instanceof ClusteringKeyConstraint) { clusterKeys.add((ClusteringKeyConstraint) node); } - else if (node instanceof ExternalVolume) + else if (node instanceof IcebergProperties) { - externalVolume = (ExternalVolume) node; + icebergProperties = (IcebergProperties) node; } else if (node instanceof Map) { @@ -176,9 +166,9 @@ void validate() throws SqlDomException { throw new SqlDomException("Columns list is mandatory for Create Table Command"); } - if (types.stream().anyMatch(tableType -> tableType instanceof IcebergTableType) && externalVolume == null) + if (types.stream().anyMatch(tableType -> tableType instanceof IcebergTableType) && icebergProperties == null) { - throw new SqlDomException("External Volume is mandatory for Iceberg Table"); + throw new SqlDomException("Iceberg properties are mandatory for Iceberg Table"); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/ShowCommand.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/ShowCommand.java index 1a61470b5dd..95074201d73 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/ShowCommand.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/ShowCommand.java @@ -20,41 +20,46 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.common.ShowType; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; +import java.util.Optional; + import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; public class ShowCommand implements SqlGen { private final ShowType operation; - private String databaseName; - private String schemaName; + private Optional databaseName; + private Optional schemaName; private String tableName; + private final String quoteIdentifier; + - public ShowCommand(ShowType operation, String databaseName, String schemaName, String tableName) + public ShowCommand(ShowType operation, Optional databaseName, Optional schemaName, String tableName, String quoteIdentifier) { this.operation = operation; this.databaseName = databaseName; this.schemaName = schemaName; this.tableName = tableName; + this.quoteIdentifier = quoteIdentifier; } - public String getDatabaseName() + public Optional getDatabaseName() { return databaseName; } public void setDatabaseName(String databaseName) { - this.databaseName = databaseName; + this.databaseName = Optional.of(databaseName); } - public String getSchemaName() + public Optional getSchemaName() { return schemaName; } public void setSchemaName(String schemaName) { - this.schemaName = schemaName; + this.schemaName = Optional.of(schemaName); } public String getTableName() @@ -71,7 +76,7 @@ public void setTableName(String tableName) SHOW { SCHEMAS | - TABLES [LIKE 'tableName'] [ IN schemaName ] | + TABLES [LIKE 'pattern'] [ IN schemaName ] | } */ @@ -86,17 +91,17 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(WHITE_SPACE + Clause.LIKE.get()); builder.append(WHITE_SPACE); builder.append(SqlGenUtils.singleQuote(tableName)); - if (databaseName != null && schemaName != null) + if (databaseName.isPresent() && !databaseName.get().isEmpty() && schemaName.isPresent() && !schemaName.get().isEmpty()) { builder.append(WHITE_SPACE); builder.append(Clause.IN.get() + WHITE_SPACE); - builder.append(databaseName + SqlGenUtils.DOT + schemaName); + builder.append(SqlGenUtils.getQuotedField(databaseName.get(), quoteIdentifier) + SqlGenUtils.DOT + SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } - else if (schemaName != null) + else if (schemaName.isPresent() && !schemaName.get().isEmpty()) { builder.append(WHITE_SPACE); builder.append(Clause.IN.get() + WHITE_SPACE); - builder.append(schemaName); + builder.append(SqlGenUtils.getQuotedField(schemaName.get(), quoteIdentifier)); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java index 1cb69388daf..86413fc9f94 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/SnowflakeTestArtifacts.java @@ -37,4 +37,18 @@ public class SnowflakeTestArtifacts "\"BATCH_STATUS\" VARCHAR(32)," + "\"TABLE_BATCH_ID\" INTEGER," + "\"STAGING_FILTERS\" VARIANT)"; + + public static String expectedBaseTempStagingTableWithCountAndDataSplit = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging_legend_persistence_temp_staging\"" + + "(\"id\" INTEGER NOT NULL," + + "\"name\" VARCHAR NOT NULL," + + "\"amount\" DOUBLE," + + "\"biz_date\" DATE," + + "\"legend_persistence_count\" INTEGER," + + "\"data_split\" INTEGER NOT NULL)"; + + public static String expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates = "INSERT INTO \"mydb\".\"staging_legend_persistence_temp_staging\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"legend_persistence_count\", \"data_split\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"legend_persistence_count\" as \"legend_persistence_count\",DENSE_RANK() OVER (PARTITION BY stage.\"id\",stage.\"name\" ORDER BY stage.\"biz_date\" ASC) as \"data_split\" " + + "FROM (SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",COUNT(*) as \"legend_persistence_count\" FROM \"mydb\".\"staging\" as stage " + + "GROUP BY stage.\"id\", stage.\"name\", stage.\"amount\", stage.\"biz_date\") as stage)"; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java index a55f29a2994..64b59eecc4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/AppendOnlyTest.java @@ -14,11 +14,114 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.AnsiTestArtifacts; +import org.finos.legend.engine.persistence.components.SnowflakeTestArtifacts; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.relational.RelationalSink; +import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; +import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; +import org.finos.legend.engine.persistence.components.scenarios.AppendOnlyScenarios; +import org.finos.legend.engine.persistence.components.scenarios.TestScenario; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; public class AppendOnlyTest extends org.finos.legend.engine.persistence.components.ingestmode.nontemporal.AppendOnlyTest { + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; + String rowsUpdated = "SELECT 0 as \"rowsUpdated\""; + String rowsTerminated = "SELECT 0 as \"rowsTerminated\""; + String rowsDeleted = "SELECT 0 as \"rowsDeleted\""; + + @Override + @Test + public void testAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration() + { + TestScenario scenario = new AppendOnlyScenarios().NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(operations); + } + + @Override + public void verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + + String insertSql = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('id',stage.\"id\",'name',stage.\"name\",'amount',stage.\"amount\",'biz_date',stage.\"biz_date\")) " + + "FROM \"mydb\".\"staging\" as stage)"; + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQueryWithNoPKs, preActionsSqlList.get(0)); + Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + + // Stats + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertNull(operations.postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + } + + @Override + @Test + public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration() + { + TestScenario scenario = new AppendOnlyScenarios().WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); + verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(operations, dataSplitRangesOneToTwo); + } + + @Override + public void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(List generatorResults, List dataSplitRanges) + { + String insertSql = "INSERT INTO \"mydb\".\"main\" " + + "(\"id\", \"name\", \"amount\", \"biz_date\", \"digest\", \"batch_update_time\") " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\"," + + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('id',stage.\"id\",'name',stage.\"name\",'amount',stage.\"amount\",'biz_date',stage.\"biz_date\"))," + + "'2000-01-01 00:00:00.000000' " + + "FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}'))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestPlusUpdateTimestampCreateQuery, generatorResults.get(0).preActionsSql().get(0)); + Assertions.assertEquals(SnowflakeTestArtifacts.expectedBaseTempStagingTableWithCountAndDataSplit, generatorResults.get(0).preActionsSql().get(1)); + + Assertions.assertEquals(AnsiTestArtifacts.expectedTempStagingCleanupQuery, generatorResults.get(0).deduplicationAndVersioningSql().get(0)); + Assertions.assertEquals(SnowflakeTestArtifacts.expectedInsertIntoBaseTempStagingWithAllVersionAndFilterDuplicates, generatorResults.get(0).deduplicationAndVersioningSql().get(1)); + + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(0)), generatorResults.get(0).ingestSql().get(0)); + Assertions.assertEquals(enrichSqlWithDataSplits(insertSql, dataSplitRanges.get(1)), generatorResults.get(1).ingestSql().get(0)); + Assertions.assertEquals(2, generatorResults.size()); + + // Stats + String incomingRecordCount = "SELECT COALESCE(SUM(stage.\"legend_persistence_count\"),0) as \"incomingRecordCount\" FROM \"mydb\".\"staging_legend_persistence_temp_staging\" as stage " + + "WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; + String rowsInserted = "SELECT COUNT(*) as \"rowsInserted\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_update_time\" = (SELECT MAX(sink.\"batch_update_time\") FROM \"mydb\".\"main\" as sink)"; + + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(enrichSqlWithDataSplits(incomingRecordCount, dataSplitRanges.get(1)), generatorResults.get(1).postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsUpdated, generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_UPDATED)); + Assertions.assertEquals(rowsDeleted, generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + Assertions.assertEquals(enrichSqlWithDataSplits(rowsInserted, dataSplitRanges.get(0)), generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_INSERTED)); + Assertions.assertEquals(rowsTerminated, generatorResults.get(0).postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + } @Override public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 70529e54d7e..54d26d645c9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; @@ -32,6 +33,8 @@ import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.StandardFileFormat; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.UserDefinedFileFormat; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -86,8 +89,11 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() .stagedFilesDatasetProperties( SnowflakeStagedFilesDatasetProperties.builder() .location("my_location") - .fileFormat("my_file_format") - .addAllFiles(filesList).build()) + .fileFormat(StandardFileFormat.builder() + .formatType(FileFormatType.CSV) + .putFormatOptions("FIELD_DELIMITER", ",") + .build()) + .addAllFilePatterns(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) .build(); @@ -101,7 +107,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue("task123") + .bulkLoadEventIdValue("task123") .batchIdPattern("{NEXT_BATCH_ID}") .build(); @@ -117,11 +123,13 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() "(\"col_int\", \"col_integer\", \"batch_id\", \"append_time\") " + "FROM " + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",{NEXT_BATCH_ID},'2000-01-01 00:00:00.000000' " + - "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage)" + - " on_error = 'ABORT_STATEMENT'"; + "FROM my_location as legend_persistence_stage) " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (FIELD_DELIMITER = ',', TYPE = 'CSV')" + + " ON_ERROR = 'ABORT_STATEMENT'"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT {NEXT_BATCH_ID},'my_name','2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"task123\"}'))"; + "(SELECT {NEXT_BATCH_ID},'my_name','2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"event_id\":\"task123\",\"file_patterns\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -146,8 +154,8 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .stagedFilesDatasetProperties( SnowflakeStagedFilesDatasetProperties.builder() .location("my_location") - .fileFormat("my_file_format") - .addAllFiles(filesList).build()) + .fileFormat(StandardFileFormat.builder().formatType(FileFormatType.CSV).build()) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col3, col4)).build()) .alias("t") .build(); @@ -161,7 +169,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .ingestMode(bulkLoad) .relationalSink(SnowflakeSink.get()) .collectStatistics(true) - .bulkLoadTaskIdValue("task123") + .bulkLoadEventIdValue("task123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -175,8 +183,10 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() "(\"col_bigint\", \"col_variant\", \"batch_id\") " + "FROM " + "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\",(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME') " + - "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as t) " + - "on_error = 'ABORT_STATEMENT'"; + "FROM my_location as t) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + + "FILE_FORMAT = (TYPE = 'CSV') " + + "ON_ERROR = 'ABORT_STATEMENT'"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -199,8 +209,8 @@ public void testBulkLoadWithUpperCaseConversionAndNoTaskId() .stagedFilesDatasetProperties( SnowflakeStagedFilesDatasetProperties.builder() .location("my_location") - .fileFormat("my_file_format") - .addAllFiles(filesList).build()) + .fileFormat(UserDefinedFileFormat.of("my_file_format")) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) .build(); @@ -232,13 +242,14 @@ public void testBulkLoadWithUpperCaseConversionAndNoTaskId() "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('COL_INT',legend_persistence_stage.$1,'COL_INTEGER',legend_persistence_stage.$2))," + "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME'),'2000-01-01 00:00:00.000000' " + - "FROM my_location (FILE_FORMAT => 'my_file_format', " + - "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + - "on_error = 'ABORT_STATEMENT'"; + "FROM my_location as legend_persistence_stage) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + + "FILE_FORMAT = (FORMAT_NAME = 'my_file_format') " + + "ON_ERROR = 'ABORT_STATEMENT'"; String expectedMetadataIngestSql = "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + "(SELECT (SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')," + - "'MY_NAME','2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + "'MY_NAME','2000-01-01 00:00:00.000000',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"file_paths\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -312,7 +323,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue("batch123") + .bulkLoadEventIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); @@ -337,8 +348,8 @@ public void testBulkLoadWithDigest() .stagedFilesDatasetProperties( SnowflakeStagedFilesDatasetProperties.builder() .location("my_location") - .fileFormat("my_file_format") - .addAllFiles(filesList).build()) + .fileFormat(UserDefinedFileFormat.of("my_file_format")) + .addAllFilePaths(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) .build(); @@ -352,7 +363,7 @@ public void testBulkLoadWithDigest() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadTaskIdValue("task123") + .bulkLoadEventIdValue("task123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -369,9 +380,133 @@ public void testBulkLoadWithDigest() "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME'),'2000-01-01 00:00:00.000000' " + - "FROM my_location (FILE_FORMAT => 'my_file_format', " + - "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + - "on_error = 'ABORT_STATEMENT'"; + "FROM my_location as legend_persistence_stage) " + + "FILES = ('/path/xyz/file1.csv', '/path/xyz/file2.csv') " + + "FILE_FORMAT = (FORMAT_NAME = 'my_file_format') " + + "ON_ERROR = 'ABORT_STATEMENT'"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + + Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); + } + + @Test + public void testBulkLoadWithDigestAndForceOption() + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField("digest").digestUdfName("LAKEHOUSE_UDF").build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + SnowflakeStagedFilesDatasetProperties.builder() + .location("my_location") + .fileFormat(UserDefinedFileFormat.of("my_file_format")) + .putCopyOptions("FORCE", true) + .addAllFilePatterns(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(SnowflakeSink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadEventIdValue("task123") + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"batch_id\" INTEGER,\"append_time\" DATETIME)"; + + String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + + "(\"col_int\", \"col_integer\", \"digest\", \"batch_id\", \"append_time\") " + + "FROM " + + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME'),'2000-01-01 00:00:00.000000' " + + "FROM my_location as legend_persistence_stage) " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (FORMAT_NAME = 'my_file_format') " + + "FORCE = true, ON_ERROR = 'ABORT_STATEMENT'"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + + Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00.000000'", statsSql.get(ROWS_INSERTED)); + } + + @Test + public void testBulkLoadWithDigestAndForceOptionAndOnErrorOption() + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField("digest").digestUdfName("LAKEHOUSE_UDF").build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + SnowflakeStagedFilesDatasetProperties.builder() + .location("my_location") + .fileFormat(StandardFileFormat.builder() + .formatType(FileFormatType.CSV) + .putFormatOptions("FIELD_DELIMITER", ",") + .build()) + .putCopyOptions("ON_ERROR", "SKIP_FILE") + .addAllFilePatterns(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(SnowflakeSink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadEventIdValue("task123") + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"batch_id\" INTEGER,\"append_time\" DATETIME)"; + + String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + + "(\"col_int\", \"col_integer\", \"digest\", \"batch_id\", \"append_time\") " + + "FROM " + + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME'),'2000-01-01 00:00:00.000000' " + + "FROM my_location as legend_persistence_stage) " + + "PATTERN = '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)' " + + "FILE_FORMAT = (FIELD_DELIMITER = ',', TYPE = 'CSV') " + + "ON_ERROR = 'SKIP_FILE'"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java index 49ffe9bfc6e..149d2fc7619 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/NontemporalDeltaMergeTest.java @@ -265,6 +265,37 @@ public void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); } + @Override + public void verifyNontemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations) + { + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + + String mergeSql = "MERGE INTO \"mydb\".\"main\" as sink " + + "USING " + + "(SELECT stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))) as stage " + + "ON (sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\") " + + "WHEN MATCHED AND sink.\"digest\" <> stage.\"digest\" " + + "THEN UPDATE SET " + + "sink.\"id\" = stage.\"id\"," + + "sink.\"name\" = stage.\"name\"," + + "sink.\"amount\" = stage.\"amount\"," + + "sink.\"biz_date\" = stage.\"biz_date\"," + + "sink.\"digest\" = stage.\"digest\" " + + "WHEN NOT MATCHED THEN " + + "INSERT (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + + "VALUES (stage.\"id\",stage.\"name\",stage.\"amount\",stage.\"biz_date\",stage.\"digest\")"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); + Assertions.assertEquals(mergeSql, milestoningSqlList.get(0)); + + String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"biz_date\" > '2020-01-10') OR ((stage.\"biz_date\" > '2020-01-01') AND (stage.\"biz_date\" < '2020-01-05'))"; + // Stats + Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); + Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); + Assertions.assertEquals(rowsDeleted, operations.postIngestStatisticsSql().get(StatisticName.ROWS_DELETED)); + } + @Override public void verifyNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java index c8a96a8745b..b9d52ed474b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java @@ -17,12 +17,11 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetAdditionalProperties; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.IcebergProperties; import org.finos.legend.engine.persistence.components.logicalplan.datasets.TableOrigin; import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; import org.finos.legend.engine.persistence.components.relational.snowflake.optmizer.UpperCaseOptimizer; -import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.tabletypes.IcebergTableType; -import org.finos.legend.engine.persistence.components.relational.sqldom.tabletypes.TableType; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; import org.finos.legend.engine.persistence.components.transformer.TransformOptions; import org.junit.jupiter.api.Assertions; @@ -141,7 +140,7 @@ public void testCreateTableWithClusteringKeyWithUpperCase() } @Test - public void testCreateIcebergTableExternalVolumeMissing() + public void testCreateIcebergTablePropertiesMissing() { DatasetDefinition dataset = DatasetDefinition.builder() .database("my_db") @@ -162,7 +161,7 @@ public void testCreateIcebergTableExternalVolumeMissing() } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("External Volume is mandatory for Iceberg Table")); + Assertions.assertTrue(e.getMessage().contains("Iceberg properties are mandatory for Iceberg Table")); } } @@ -178,7 +177,7 @@ public void testCreateIcebergTable() .schema(schemaWithAllColumns) .datasetAdditionalProperties(DatasetAdditionalProperties.builder() .tableOrigin(TableOrigin.ICEBERG) - .externalVolume("my_ext_vol") + .icebergProperties(IcebergProperties.builder().externalVolume("my_ext_vol").catalog("SNOWFLAKE").baseLocation("my_location").build()) .build()) .build(); Operation create = Create.of(true, dataset); @@ -187,13 +186,12 @@ public void testCreateIcebergTable() SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); String expected = "CREATE ICEBERG TABLE IF NOT EXISTS \"my_db\".\"my_schema\".\"my_table\"" + - " with EXTERNAL_VOLUME = 'my_ext_vol' " + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER NOT NULL UNIQUE,\"col_bigint\" BIGINT," + "\"col_tinyint\" TINYINT,\"col_smallint\" SMALLINT,\"col_char\" CHAR,\"col_varchar\" VARCHAR," + "\"col_string\" VARCHAR,\"col_timestamp\" TIMESTAMP,\"col_datetime\" DATETIME,\"col_date\" DATE," + "\"col_real\" DOUBLE,\"col_float\" DOUBLE,\"col_decimal\" NUMBER(10,4),\"col_double\" DOUBLE," + "\"col_binary\" BINARY,\"col_time\" TIME,\"col_numeric\" NUMBER(38,0),\"col_boolean\" BOOLEAN," + - "\"col_varbinary\" BINARY(10)) ICEBERG_TABLE_2022 = true"; + "\"col_varbinary\" BINARY(10)) CATALOG='SNOWFLAKE', EXTERNAL_VOLUME='my_ext_vol', BASE_LOCATION='my_location'"; Assertions.assertEquals(expected, list.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java index 420a8b2226f..ddf24c67f61 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/ShowTest.java @@ -40,7 +40,7 @@ public void testShowCommand() LogicalPlan logicalPlan = LogicalPlanFactory.getLogicalPlanForDoesDatasetExist(dataset); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW TABLES LIKE 'trips' IN CITIBIKE.public"; + String expected = "SHOW TABLES LIKE 'trips' IN \"CITIBIKE\".\"public\""; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -57,7 +57,7 @@ public void testShowCommandWithUpperCase() LogicalPlan logicalPlan = LogicalPlanFactory.getLogicalPlanForDoesDatasetExist(dataset); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW TABLES LIKE 'TRIPS' IN CITIBIKE.PUBLIC"; + String expected = "SHOW TABLES LIKE 'TRIPS' IN \"CITIBIKE\".\"PUBLIC\""; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } @@ -83,7 +83,7 @@ public void testShowCommandWithSchemaWithoutDb() LogicalPlan logicalPlan = LogicalPlanFactory.getLogicalPlanForDoesDatasetExist(dataset); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expected = "SHOW TABLES LIKE 'trips' IN public"; + String expected = "SHOW TABLES LIKE 'trips' IN \"public\""; assertEquals(1, list.size()); assertEquals(expected, list.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java index b9435bc80af..78877a8d46e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/CopyStatementTest.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops; +import org.finos.legend.engine.persistence.components.common.FileFormatType; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.expressions.table.StagedFilesTable; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.statements.CopyStatement; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.StagedFilesField; @@ -26,7 +27,9 @@ import org.junit.jupiter.api.Test; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -35,7 +38,7 @@ public class CopyStatementTest public static String QUOTE_IDENTIFIER = "\"%s\""; @Test - void testSelectStageStatement() throws SqlDomException + void testCopyStatementWithFilesAndStandardFileFormat() throws SqlDomException { StagedFilesTable stagedFiles = new StagedFilesTable("t","@my_stage"); List selectItems = Arrays.asList( @@ -55,20 +58,29 @@ void testSelectStageStatement() throws SqlDomException ); CopyStatement copyStatement = new CopyStatement(table, columns, selectStatement); + copyStatement.setFilePaths(Arrays.asList("path1", "path2")); + copyStatement.setFileFormatType(FileFormatType.CSV); + Map fileFormatOptions = new HashMap<>(); + fileFormatOptions.put("COMPRESSION", "AUTO"); + copyStatement.setFileFormatOptions(fileFormatOptions); + Map copyOptions = new HashMap<>(); + copyOptions.put("ON_ERROR", "ABORT_STATEMENT"); + copyStatement.setCopyOptions(copyOptions); + String sql1 = genSqlIgnoringErrors(copyStatement); assertEquals("COPY INTO \"mydb\".\"mytable1\" " + "(\"field1\", \"field2\", \"field3\", \"field4\") " + "FROM " + "(SELECT t.$1 as \"field1\",t.$2 as \"field2\",t.$3 as \"field3\",t.$4 as \"field4\" FROM @my_stage as t) " + - "on_error = 'ABORT_STATEMENT'", sql1); + "FILES = ('path1', 'path2') " + + "FILE_FORMAT = (COMPRESSION = 'AUTO', TYPE = 'CSV') " + + "ON_ERROR = 'ABORT_STATEMENT'", sql1); } @Test - void testSelectStageStatementWithPatternAndFileFormat() throws SqlDomException + void testCopyStatementWithPatternAndFileFormatAndForceOption() throws SqlDomException { StagedFilesTable stagedFiles = new StagedFilesTable("t","@my_stage"); - stagedFiles.setFileFormat("my_file_format"); - stagedFiles.setFilePattern("my_pattern"); List selectItems = Arrays.asList( new StagedFilesField(QUOTE_IDENTIFIER, 1, "t", "field1", "field1"), @@ -87,14 +99,24 @@ void testSelectStageStatementWithPatternAndFileFormat() throws SqlDomException new Field("field4", QUOTE_IDENTIFIER) ); + Map copyOptions = new HashMap<>(); + copyOptions.put("FORCE", true); + copyOptions.put("ON_ERROR", "ABORT_STATEMENT"); CopyStatement copyStatement = new CopyStatement(table, columns, selectStatement); + copyStatement.setFilePatterns(Arrays.asList("my_pattern1", "my_pattern2")); + copyStatement.setUserDefinedFileFormatName("my_file_format"); + copyStatement.setCopyOptions(copyOptions); + String sql1 = genSqlIgnoringErrors(copyStatement); - assertEquals("COPY INTO \"mydb\".\"mytable1\" " + + String expectedStr = "COPY INTO \"mydb\".\"mytable1\" " + "(\"field1\", \"field2\", \"field3\", \"field4\") " + "FROM " + "(SELECT t.$1:field1 as \"field1\",t.$1:field2 as \"field2\",t.$1:field3 as \"field3\",t.$1:field4 as \"field4\" " + - "FROM @my_stage (FILE_FORMAT => 'my_file_format', PATTERN => 'my_pattern') as t) " + - "on_error = 'ABORT_STATEMENT'", sql1); + "FROM @my_stage as t) " + + "PATTERN = '(my_pattern1)|(my_pattern2)' " + + "FILE_FORMAT = (FORMAT_NAME = 'my_file_format') " + + "FORCE = true, ON_ERROR = 'ABORT_STATEMENT'"; + assertEquals(expectedStr, sql1); } public static String genSqlIgnoringErrors(SqlGen item) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index adf0d700745..fab7988915f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -18,13 +18,21 @@ import org.finos.legend.engine.persistence.components.common.FilterType; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.DeriveMainDatasetSchemaFromStaging; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.LessThan; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Or; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FilteredDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.NumericalValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.relational.api.DataSplitRange; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.scenarios.TestScenario; @@ -74,6 +82,7 @@ public class BaseTest protected String stagingTableWithoutDuplicatesAlias = "stage"; protected String digestField = "digest"; + protected String digestUdf = "LAKEHOUSE_MD5"; protected String versionField = "version"; protected String bizDateField = "biz_date"; protected String snapshotIdField = "snapshot_id"; @@ -260,6 +269,13 @@ public class BaseTest .addFields(digest) .build(); + protected SchemaDefinition baseTableSchemaWithNoPrimaryKeysAndNoDigest = SchemaDefinition.builder() + .addFields(idNonPrimary) + .addFields(nameNonPrimary) + .addFields(amount) + .addFields(bizDate) + .build(); + protected SchemaDefinition baseTableSchemaWithAuditAndNoPrimaryKeys = SchemaDefinition.builder() .addFields(idNonPrimary) .addFields(nameNonPrimary) @@ -562,6 +578,11 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .schema(baseTableSchemaWithNoPrimaryKeys) .build(); + protected Dataset stagingTableWithNoPrimaryKeysAndNoDigest = DatasetDefinition.builder() + .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) + .schema(baseTableSchemaWithNoPrimaryKeysAndNoDigest) + .build(); + protected Dataset mainTableWithNoPrimaryKeysHavingAuditField = DatasetDefinition.builder() .database(mainDbName).name(mainTableName).alias(mainTableAlias) .schema(baseTableSchemaWithAuditAndNoPrimaryKeys) @@ -612,6 +633,36 @@ protected String enrichSqlWithDataSplits(String sql, DataSplitRange dataSplitRan .addDatasetFilters(DatasetFilter.of("biz_date", FilterType.LESS_THAN, "2020-01-03")) .build(); + protected Dataset filteredStagingTable = FilteredDataset.builder() + .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) + .schema(baseTableSchemaWithDigest) + .filter(GreaterThan.of(FieldValue.builder() + .fieldName(batchIdInField) + .datasetRefAlias(stagingTableAlias) + .build(), NumericalValue.of(5L))) + .build(); + + protected Dataset filteredStagingTableWithVersion = FilteredDataset.builder() + .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) + .schema(baseTableSchemaWithDigestAndVersion) + .filter(GreaterThan.of(FieldValue.builder() + .fieldName(batchIdInField) + .datasetRefAlias(stagingTableAlias) + .build(), NumericalValue.of(5L))) + .build(); + + protected Dataset filteredStagingTableWithComplexFilter = FilteredDataset.builder() + .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) + .schema(baseTableSchemaWithDigest) + .filter(Or.builder() + .addConditions(GreaterThan.of(FieldValue.builder().fieldName(bizDateField).datasetRefAlias(stagingTableAlias).build(), StringValue.of("2020-01-10"))) + .addConditions(And.builder() + .addConditions(GreaterThan.of(FieldValue.builder().fieldName(bizDateField).datasetRefAlias(stagingTableAlias).build(), StringValue.of("2020-01-01"))) + .addConditions(LessThan.of(FieldValue.builder().fieldName(bizDateField).datasetRefAlias(stagingTableAlias).build(), StringValue.of("2020-01-05"))) + .build()) + .build()) + .build(); + protected Dataset stagingTableWithBaseSchemaAndDigestAndDeleteIndicator = DatasetDefinition.builder() .database(stagingDbName).name(stagingTableName).alias(stagingTableAlias) .schema(stagingTableSchemaWithDeleteIndicator) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java index 3c71d88d913..49727a03797 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/AppendOnlyScenarios.java @@ -21,6 +21,8 @@ import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.AllVersionsStrategy; import org.finos.legend.engine.persistence.components.ingestmode.versioning.DigestBasedResolver; import org.finos.legend.engine.persistence.components.ingestmode.versioning.MaxVersionStrategy; @@ -30,7 +32,7 @@ public class AppendOnlyScenarios extends BaseTest { /* - Test Scenarios for Non-temporal Delta + Test Scenarios for Append Only Variables: 1) Auditing: No Auditing, With Auditing 2) Versioning: NoVersion, MaxVersion, AllVersion @@ -91,7 +93,7 @@ public class AppendOnlyScenarios extends BaseTest public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(NoAuditing.builder().build()) @@ -110,7 +112,7 @@ public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_REC public TestScenario WITH_AUDITING__FILTER_DUPS__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) @@ -122,7 +124,7 @@ public TestScenario WITH_AUDITING__FILTER_DUPS__NO_VERSIONING__WITH_FILTER_EXIST public TestScenario WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FailOnDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder() .versioningField(bizDateField) @@ -140,7 +142,7 @@ public TestScenario WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING public TestScenario NO_AUDITING__FILTER_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder() .versioningField(bizDateField) @@ -157,7 +159,7 @@ public TestScenario NO_AUDITING__FILTER_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RE public TestScenario WITH_AUDITING__FILTER_DUPS__ALL_VERSION__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(AllVersionsStrategy.builder() .versioningField(bizDateField) @@ -174,7 +176,7 @@ public TestScenario WITH_AUDITING__FILTER_DUPS__ALL_VERSION__WITH_FILTER_EXISTIN public TestScenario WITH_AUDITING__FAIL_ON_DUPS__MAX_VERSION__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FailOnDuplicates.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(bizDateField) @@ -190,7 +192,7 @@ public TestScenario WITH_AUDITING__FAIL_ON_DUPS__MAX_VERSION__WITH_FILTER_EXISTI public TestScenario WITH_AUDITING__FILTER_DUPS__MAX_VERSION__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .versioningStrategy(MaxVersionStrategy.builder() .versioningField(bizDateField) @@ -207,7 +209,7 @@ public TestScenario WITH_AUDITING__FILTER_DUPS__MAX_VERSION__NO_FILTER_EXISTING_ public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__WITH_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(NoAuditing.builder().build()) @@ -219,7 +221,7 @@ public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__WITH_FILTER_EXISTING_R public TestScenario WITH_AUDITING__ALLOW_DUPLICATES__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS() { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(AllowDuplicates.builder().build()) .versioningStrategy(NoVersioningStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) @@ -227,4 +229,33 @@ public TestScenario WITH_AUDITING__ALLOW_DUPLICATES__NO_VERSIONING__NO_FILTER_EX .build(); return new TestScenario(mainTableWithNoPrimaryKeysHavingAuditField, stagingTableWithNoPrimaryKeys, ingestMode); } + + public TestScenario NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION() + { + AppendOnly ingestMode = AppendOnly.builder() + .deduplicationStrategy(AllowDuplicates.builder().build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .filterExistingRecords(false) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(digestUdf).digestField(digestField).build()) + .build(); + return new TestScenario(mainTableWithNoPrimaryKeys, stagingTableWithNoPrimaryKeysAndNoDigest, ingestMode); + } + + public TestScenario WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION() + { + AppendOnly ingestMode = AppendOnly.builder() + .deduplicationStrategy(FailOnDuplicates.builder().build()) + .versioningStrategy(AllVersionsStrategy.builder() + .versioningField(bizDateField) + .dataSplitFieldName(dataSplitField) + .mergeDataVersionResolver(DigestBasedResolver.INSTANCE) + .performStageVersioning(true) + .build()) + .auditing(DateTimeAuditing.builder().dateTimeField(batchUpdateTimeField).build()) + .filterExistingRecords(false) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(digestUdf).digestField(digestField).build()) + .build(); + return new TestScenario(mainTableWithBaseSchemaHavingDigestAndAuditField, stagingTableWithBaseSchema, ingestMode); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java index 5fb0be6ba7a..9b956004abd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/NonTemporalDeltaScenarios.java @@ -117,6 +117,15 @@ public TestScenario NO_VERSIONING__WITH_STAGING_FILTER() return new TestScenario(mainTableWithBaseSchemaAndDigest, stagingTableWithFilters, ingestMode); } + public TestScenario NO_VERSIONING__WITH_FILTERED_DATASET() + { + NontemporalDelta ingestMode = NontemporalDelta.builder() + .digestField(digestField) + .auditing(NoAuditing.builder().build()) + .build(); + return new TestScenario(mainTableWithBaseSchemaAndDigest, filteredStagingTableWithComplexFilter, ingestMode); + } + public TestScenario FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTER() { NontemporalDelta ingestMode = NontemporalDelta.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java index 49b3f90aa94..d8cb52f53e6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalDeltaBatchIdBasedScenarios.java @@ -171,6 +171,19 @@ public TestScenario BATCH_ID_BASED__NO_VERSIONING__WITH_STAGING_FILTER() return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithFilter, ingestMode); } + public TestScenario BATCH_ID_BASED__NO_VERSIONING__WITH_FILTERED_DATASET() + { + UnitemporalDelta ingestMode = UnitemporalDelta.builder() + .digestField(digestField) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .build()) + .versioningStrategy(NoVersioningStrategy.builder().build()) + .build(); + return new TestScenario(mainTableWithBatchIdBasedSchema, filteredStagingTable, ingestMode); + } + public TestScenario BATCH_ID_BASED__FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTER() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() @@ -185,6 +198,20 @@ public TestScenario BATCH_ID_BASED__FILTER_DUPS__MAX_VERSION__WITH_STAGING_FILTE return new TestScenario(mainTableWithBatchIdAndVersionBasedSchema, stagingTableWithFilterAndVersion, ingestMode); } + public TestScenario BATCH_ID_BASED__FILTER_DUPS__MAX_VERSION__WITH_FILTERED_DATASET() + { + UnitemporalDelta ingestMode = UnitemporalDelta.builder() + .digestField(digestField) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .build()) + .versioningStrategy(MaxVersionStrategy.builder().performStageVersioning(true).versioningField(version.name()).mergeDataVersionResolver(VersionColumnBasedResolver.of(VersionComparator.GREATER_THAN)).build()) + .deduplicationStrategy(FilterDuplicates.builder().build()) + .build(); + return new TestScenario(mainTableWithBatchIdAndVersionBasedSchema, filteredStagingTableWithVersion, ingestMode); + } + public TestScenario BATCH_ID_BASED__NO_DEDUP__MAX_VERSION_WITHOUT_PERFORM__WITH_STAGING_FILTER() { UnitemporalDelta ingestMode = UnitemporalDelta.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java index a3cf0a5f3e3..26b25c7f317 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.AppendOnly; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -216,7 +217,7 @@ void testAppendOnlyValidationDateTimeFieldMissing() try { AppendOnly ingestMode = AppendOnly.builder() - .digestField(digestField) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(digestField).build()) .deduplicationStrategy(FilterDuplicates.builder().build()) .auditing(DateTimeAuditing.builder().build()) .build(); @@ -247,5 +248,39 @@ void testAppendOnlyNoAuditingFilterExistingRecords() } } + @Test + public void testAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration() + { + TestScenario scenario = scenarios.NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(operations); + } + + public abstract void verifyAppendOnlyNoAuditingAllowDuplicatesNoVersioningNoFilterExistingRecordsUdfDigestGeneration(GeneratorResult operations); + + @Test + public void testAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration() + { + TestScenario scenario = scenarios.WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS__UDF_DIGEST_GENERATION(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + List operations = generator.generateOperationsWithDataSplits(scenario.getDatasets(), dataSplitRangesOneToTwo); + verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(operations, dataSplitRangesOneToTwo); + } + + public abstract void verifyAppendOnlyWithAuditingFailOnDuplicatesAllVersionNoFilterExistingRecordsUdfDigestGeneration(List generatorResults, List dataSplitRanges); + public abstract RelationalSink getRelationalSink(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java index 87ad65d7f20..520434e8786 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java @@ -234,6 +234,7 @@ void testNontemporalDeltaWithNoVersionAndStagingFilter() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) + .cleanupStagingData(false) .collectStatistics(true) .build(); @@ -243,6 +244,23 @@ void testNontemporalDeltaWithNoVersionAndStagingFilter() public abstract void verifyNontemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult operations); + @Test + void testNontemporalDeltaWithNoVersionAndFilteredDataset() + { + TestScenario testScenario = scenarios.NO_VERSIONING__WITH_FILTERED_DATASET(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(testScenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .cleanupStagingData(false) + .collectStatistics(true) + .build(); + + GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); + verifyNontemporalDeltaWithNoVersionAndFilteredDataset(operations); + } + + public abstract void verifyNontemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations); + @Test void testNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters() { @@ -250,6 +268,7 @@ void testNontemporalDeltaWithFilterDupsMaxVersionWithStagingFilters() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) + .cleanupStagingData(false) .collectStatistics(true) .build(); @@ -266,6 +285,7 @@ void testNontemporalDeltaWithNoDedupMaxVersioningWithoutPerformWithStagingFilter RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(testScenario.getIngestMode()) .relationalSink(getRelationalSink()) + .cleanupStagingData(false) .collectStatistics(true) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java index 29aa39969cd..c6abb417906 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/derivation/AppendOnlyBasedDerivationTest.java @@ -20,6 +20,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.scenarios.AppendOnlyScenarios; import org.finos.legend.engine.persistence.components.scenarios.TestScenario; import org.junit.jupiter.api.Assertions; @@ -37,7 +38,8 @@ void testAppendOnlyAllowDuplicatesNoAuditingNoVersioningNoFilterExistingRecords( TestScenario scenario = scenarios.NO_AUDITING__NO_DEDUP__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS(); assertDerivedMainDataset(scenario); AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); + Assertions.assertTrue(mode.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assertions.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) mode.digestGenStrategy()).digestField()); Assertions.assertTrue(mode.auditing() instanceof NoAuditing); Assertions.assertTrue(mode.deduplicationStrategy() instanceof AllowDuplicates); } @@ -49,7 +51,8 @@ void testAppendOnlyFailOnDuplicatesWithAuditingAllVersionNoFilterExistingRecords TestScenario scenario = scenarios.WITH_AUDITING__FAIL_ON_DUPS__ALL_VERSION__NO_FILTER_EXISTING_RECORDS(); assertDerivedMainDataset(scenario); AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); + Assertions.assertTrue(mode.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assertions.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) mode.digestGenStrategy()).digestField()); Assertions.assertEquals("DATA_SPLIT", mode.dataSplitField().get()); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); @@ -64,7 +67,8 @@ void testAppendOnlyAllowDuplicatesWithAuditingNoVersioningNoFilterExistingRecord TestScenario scenario = scenarios.WITH_AUDITING__ALLOW_DUPLICATES__NO_VERSIONING__NO_FILTER_EXISTING_RECORDS(); assertDerivedMainDataset(scenario); AppendOnly mode = (AppendOnly) scenario.getIngestMode().accept(new IngestModeCaseConverter(String::toUpperCase)); - Assertions.assertEquals("DIGEST", mode.digestField().get()); + Assertions.assertTrue(mode.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assertions.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) mode.digestGenStrategy()).digestField()); Assertions.assertTrue(mode.auditing() instanceof DateTimeAuditing); DateTimeAuditing auditing = (DateTimeAuditing) mode.auditing(); Assertions.assertEquals("BATCH_UPDATE_TIME", auditing.dateTimeField()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java index 63a34113aa7..e5b9e0ffdf9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java @@ -269,7 +269,7 @@ void testUnitemporalDeltaWithNoVersioningAndStagingFilters() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) - .cleanupStagingData(true) + .cleanupStagingData(false) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalDeltaWithNoVersionAndStagingFilter(operations); @@ -277,6 +277,22 @@ void testUnitemporalDeltaWithNoVersioningAndStagingFilters() public abstract void verifyUnitemporalDeltaWithNoVersionAndStagingFilter(GeneratorResult operations); + @Test + void testUnitemporalDeltaWithNoVersioningAndFilteredDataset() + { + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_VERSIONING__WITH_FILTERED_DATASET(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .cleanupStagingData(false) + .build(); + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + verifyUnitemporalDeltaWithNoVersionAndFilteredDataset(operations); + } + + public abstract void verifyUnitemporalDeltaWithNoVersionAndFilteredDataset(GeneratorResult operations); + @Test void testUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter() { @@ -286,7 +302,7 @@ void testUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) - .cleanupStagingData(true) + .cleanupStagingData(false) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(operations); @@ -294,6 +310,23 @@ void testUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter() public abstract void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithStagingFilter(GeneratorResult operations); + @Test + void testUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset() + { + TestScenario scenario = scenarios.BATCH_ID_BASED__FILTER_DUPS__MAX_VERSION__WITH_FILTERED_DATASET(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .cleanupStagingData(false) + .build(); + GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); + this.verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(operations); + } + + public abstract void verifyUnitemporalDeltaWithFilterDupsMaxVersionWithFilteredDataset(GeneratorResult operations); + @Test void testUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters() { @@ -303,7 +336,7 @@ void testUnitemporalDeltaWithNoDedupMaxVersionWithoutPerformAndStagingFilters() .ingestMode(scenario.getIngestMode()) .relationalSink(getRelationalSink()) .executionTimestampClock(fixedClock_2000_01_01) - .cleanupStagingData(true) + .cleanupStagingData(false) .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java index 7e97be5066e..9ee7343a6ae 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/AppendOnlyMapper.java @@ -18,6 +18,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategy; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.dataset.DatasetType; import org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.persister.ingestmode.appendonly.AppendOnly; import org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.Nontemporal; @@ -31,7 +32,7 @@ public class AppendOnlyMapper public static org.finos.legend.engine.persistence.components.ingestmode.AppendOnly from(AppendOnly appendOnly) { return org.finos.legend.engine.persistence.components.ingestmode.AppendOnly.builder() - .digestField(DIGEST_FIELD_DEFAULT) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(DIGEST_FIELD_DEFAULT).build()) .filterExistingRecords(appendOnly.filterDuplicates) .auditing(appendOnly.auditing.accept(MappingVisitors.MAP_TO_COMPONENT_AUDITING)) .build(); @@ -52,7 +53,7 @@ public static org.finos.legend.engine.persistence.components.ingestmode.AppendOn } return org.finos.legend.engine.persistence.components.ingestmode.AppendOnly.builder() - .digestField(DIGEST_FIELD_DEFAULT) + .digestGenStrategy(UserProvidedDigestGenStrategy.builder().digestField(DIGEST_FIELD_DEFAULT).build()) .filterExistingRecords(filterExistingRecords) .auditing(temporality.auditing.accept(org.finos.legend.engine.testable.persistence.mapper.v2.MappingVisitors.MAP_TO_COMPONENT_NONTEMPORAL_AUDITING)) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java index aeb17585d55..39c58854d83 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/DeriveDatasets.java @@ -78,10 +78,7 @@ public DeriveDatasets(Dataset mainDataset, String testData) public Datasets visit(AppendOnly appendOnly) { Dataset stagingDataset = stagingDatasetBuilder.schema(stagingSchemaDefinitionBuilder.build()).build(); - if (appendOnly.filterDuplicates) - { - enrichMainSchemaWithDigest(); - } + enrichMainSchemaWithDigest(); boolean baseSchemaHasPks = baseSchema.fields().stream().anyMatch(field -> field.primaryKey()); appendOnly.auditing.accept(new MappingVisitors.EnrichSchemaWithAuditing(mainSchemaDefinitionBuilder, baseSchema, baseSchemaHasPks)); Dataset enrichedMainDataset = mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.build()).build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/IngestModeVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/IngestModeVisitors.java index c02dbf1f05b..9bad42ae936 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/IngestModeVisitors.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v1/IngestModeVisitors.java @@ -50,10 +50,7 @@ public Set visit(AppendOnly appendOnly) { fieldsToIgnore.add(auditField.get()); } - if (appendOnly.filterDuplicates) - { - fieldsToIgnore.add(DIGEST_FIELD_DEFAULT); - } + fieldsToIgnore.add(DIGEST_FIELD_DEFAULT); return fieldsToIgnore; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v2/IngestModeMapper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v2/IngestModeMapper.java index 4a7dfbe3edc..1da328f1c97 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v2/IngestModeMapper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/main/java/org/finos/legend/engine/testable/persistence/mapper/v2/IngestModeMapper.java @@ -193,11 +193,7 @@ public static Datasets deriveDatasets(ServiceOutputTarget serviceOutputTarget, D else { ignoreAuditField(fieldsToIgnore, nontemporal); - org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.AppendOnly appendOnlyHandling = (org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.AppendOnly) nontemporal.updatesHandling; - if (appendOnlyHandling.appendStrategy instanceof FilterDuplicates) - { - enrichMainSchemaWithDigest(baseSchema, mainSchemaDefinitionBuilder); - } + enrichMainSchemaWithDigest(baseSchema, mainSchemaDefinitionBuilder); Dataset stagingDataset = getStagingDataset(baseSchema, stagingDatasetBuilder, fieldsToIgnore, fieldsToAdd); Dataset enrichedMainDataset = mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.build()).build(); return Datasets.of(enrichedMainDataset, stagingDataset); @@ -288,22 +284,9 @@ private static Set extractFieldsToExclude(DatasetType datasetType, Tempo } else { - if (nontemporal.updatesHandling instanceof Overwrite) - { - ignoreAuditField(fieldsToIgnore, nontemporal); - fieldsToIgnore.add(DIGEST_FIELD_DEFAULT); - return fieldsToIgnore; - } - else - { - ignoreAuditField(fieldsToIgnore, nontemporal); - org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.AppendOnly appendOnlyHandling = (org.finos.legend.engine.protocol.pure.v1.model.packageableElement.persistence.relational.temporality.updatesHandling.AppendOnly) nontemporal.updatesHandling; - if (appendOnlyHandling.appendStrategy instanceof FilterDuplicates) - { - fieldsToIgnore.add(DIGEST_FIELD_DEFAULT); - } - return fieldsToIgnore; - } + ignoreAuditField(fieldsToIgnore, nontemporal); + fieldsToIgnore.add(DIGEST_FIELD_DEFAULT); + return fieldsToIgnore; } } case Unitemporal: diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithAllowDuplicates.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithAllowDuplicates.java index c21aa6882ff..23f7d5f7bae 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithAllowDuplicates.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/ingestmode/appendonly/TestAppendOnlyWithAllowDuplicates.java @@ -67,7 +67,7 @@ public void testAppendOnlyWithFailure() throws Exception AssertPass batch2Status = (AssertPass) testFailed.assertStatuses.get(1); // no space - Assert.assertEquals("[{\"ID\":1,\"NAME\":\"ANDY\"},{\"ID\":2,\"NAME\":\"BRAD\"}]", batch1Status.actual); + Assert.assertEquals("[{\"ID\":1,\"DIGEST\":\"fd0de4a73667047d16c76c8a6a61610f\",\"NAME\":\"ANDY\"},{\"ID\":2,\"DIGEST\":\"5c38f8982fc67a7f1cdf2e3c00a29339\",\"NAME\":\"BRAD\"}]", batch1Status.actual); // with space Assert.assertEquals("[{\"ID\":1, \"NAME\":\"CHLOE\"},{\"ID\":2, \"NAME\":\"BRAD\"}]", batch1Status.expected); Assert.assertTrue(batch1Status.message.contains("AssertionError: Results do not match the expected data")); @@ -129,7 +129,7 @@ public void testAppendOnlyWithFailureV2() throws Exception AssertPass batch2Status = (AssertPass) testFailed.assertStatuses.get(1); // no space - Assert.assertEquals("[{\"ID\":1,\"NAME\":\"ANDY\"},{\"ID\":2,\"NAME\":\"BRAD\"}]", batch1Status.actual); + Assert.assertEquals("[{\"ID\":1,\"DIGEST\":\"fd0de4a73667047d16c76c8a6a61610f\",\"NAME\":\"ANDY\"},{\"ID\":2,\"DIGEST\":\"5c38f8982fc67a7f1cdf2e3c00a29339\",\"NAME\":\"BRAD\"}]", batch1Status.actual); // with space Assert.assertEquals("[{\"ID\":1, \"NAME\":\"CHLOE\"},{\"ID\":2, \"NAME\":\"BRAD\"}]", batch1Status.expected); Assert.assertTrue(batch1Status.message.contains("AssertionError: Results do not match the expected data")); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java index 0d6245a757d..23034058ed4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/IngestModeMapperTest.java @@ -23,6 +23,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.AllowDuplicates; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicates; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UserProvidedDigestGenStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.DeleteIndicatorMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.NoDeletesMergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; @@ -82,7 +83,8 @@ public void testMapperForAppendOnly() throws Exception Assert.assertTrue(componentIngestMode instanceof AppendOnly); AppendOnly appendOnly = (AppendOnly) componentIngestMode; - Assert.assertEquals("DIGEST", appendOnly.digestField().get()); + Assert.assertTrue(appendOnly.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assert.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) appendOnly.digestGenStrategy()).digestField()); Assert.assertTrue(appendOnly.auditing() instanceof NoAuditing); Assert.assertFalse(appendOnly.filterExistingRecords()); @@ -93,7 +95,8 @@ public void testMapperForAppendOnly() throws Exception Assert.assertTrue(componentIngestMode instanceof AppendOnly); appendOnly = (AppendOnly) componentIngestMode; - Assert.assertEquals("DIGEST", appendOnly.digestField().get()); + Assert.assertTrue(appendOnly.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assert.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) appendOnly.digestGenStrategy()).digestField()); Assert.assertTrue(appendOnly.auditing() instanceof NoAuditing); Assert.assertTrue(appendOnly.filterExistingRecords()); @@ -104,7 +107,8 @@ public void testMapperForAppendOnly() throws Exception Assert.assertTrue(componentIngestMode instanceof AppendOnly); appendOnly = (AppendOnly) componentIngestMode; - Assert.assertEquals("DIGEST", appendOnly.digestField().get()); + Assert.assertTrue(appendOnly.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assert.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) appendOnly.digestGenStrategy()).digestField()); Assert.assertTrue(appendOnly.auditing() instanceof DateTimeAuditing); DateTimeAuditing dateTimeAuditing = (DateTimeAuditing) appendOnly.auditing(); Assert.assertEquals("AUDIT_TIME", dateTimeAuditing.dateTimeField()); @@ -117,7 +121,8 @@ public void testMapperForAppendOnly() throws Exception Assert.assertTrue(componentIngestMode instanceof AppendOnly); appendOnly = (AppendOnly) componentIngestMode; - Assert.assertEquals("DIGEST", appendOnly.digestField().get()); + Assert.assertTrue(appendOnly.digestGenStrategy() instanceof UserProvidedDigestGenStrategy); + Assert.assertEquals("DIGEST", ((UserProvidedDigestGenStrategy) appendOnly.digestGenStrategy()).digestField()); Assert.assertTrue(appendOnly.auditing() instanceof DateTimeAuditing); dateTimeAuditing = (DateTimeAuditing) appendOnly.auditing(); Assert.assertEquals("AUDIT_TIME", dateTimeAuditing.dateTimeField()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/TestFieldsToExclude.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/TestFieldsToExclude.java index 9c0025147f5..dd7c6796565 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/TestFieldsToExclude.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-test-runner/src/test/java/org/finos/legend/engine/testable/persistence/mapper/TestFieldsToExclude.java @@ -41,7 +41,7 @@ public void testFieldsToExcludeForAppendOnly() { IngestMode ingestMode = getAppendOnlyNoAuditingNoFilteringDuplicates(); Set fieldsToIgnore = ingestMode.accept(IngestModeVisitors.EXTRACT_FIELDS_TO_EXCLUDE); - Assert.assertTrue(fieldsToIgnore.isEmpty()); + Assert.assertTrue(fieldsToIgnore.contains("DIGEST")); ingestMode = getAppendOnlyNoAuditingWithFilteringDuplicates(); fieldsToIgnore = ingestMode.accept(IngestModeVisitors.EXTRACT_FIELDS_TO_EXCLUDE); @@ -50,6 +50,7 @@ public void testFieldsToExcludeForAppendOnly() ingestMode = getAppendOnlyDatetimeAuditingNoFilteringDuplicates(); fieldsToIgnore = ingestMode.accept(IngestModeVisitors.EXTRACT_FIELDS_TO_EXCLUDE); Assert.assertTrue(fieldsToIgnore.contains("AUDIT_TIME")); + Assert.assertTrue(fieldsToIgnore.contains("DIGEST")); ingestMode = getAppendOnlyDatetimeAuditingWithFilteringDuplicates(); fieldsToIgnore = ingestMode.accept(IngestModeVisitors.EXTRACT_FIELDS_TO_EXCLUDE);