From 130c0c275187b7d57e9a4e92a6c8b134f5fcb4ad Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Mon, 14 Aug 2023 12:23:16 +0800 Subject: [PATCH 01/57] Bug Fix: Bitemporal milestoning Derive Main schema removes the VALID_FROM/VALID_TRHOUGH field if the name matches with source specified VALID_FROM/VALID_TRHOUGH fields --- .../ingestmode/DeriveMainDatasetSchemaFromStaging.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index e209261e5b9..e606015478d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -313,9 +313,9 @@ public Void visitDateTime(ValidDateTimeAbstract validDateTime) { Field dateTimeFrom = getBatchTimeField(validDateTime.dateTimeFromName(), true); Field dateTimeThru = getBatchTimeField(validDateTime.dateTimeThruName(), false); + validDateTime.validityDerivation().accept(new EnrichSchemaWithValidityMilestoningDerivation(mainSchemaFields)); mainSchemaFields.add(dateTimeFrom); mainSchemaFields.add(dateTimeThru); - validDateTime.validityDerivation().accept(new EnrichSchemaWithValidityMilestoningDerivation(mainSchemaFields)); return null; } } From b25832ba10cb33d29d414d109ca3c7d41dd57df7 Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Mon, 14 Aug 2023 14:55:58 +0800 Subject: [PATCH 02/57] Bug Fix: Bitemporal milestoning Schema Evolution must ignore user provided validity fields --- .../schemaevolution/SchemaEvolution.java | 35 ++++++++++-- .../schemaevolution/SchemaEvolutionTest.java | 53 +++++++++++++++++++ ...ourceSpecifiesFromAndThroughScenarios.java | 40 ++++++++++++++ ...DeltaSourceSpecifiesFromOnlyScenarios.java | 44 +++++++++++++++ ...SpecifiesFromAndThroughDerivationTest.java | 7 +++ ...eltaSourceSpecifiesFromDerivationTest.java | 7 +++ 6 files changed, 182 insertions(+), 4 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java index 6d7f17b152c..15765b38b52 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolution.java @@ -32,6 +32,9 @@ import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoningVisitor; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidDateTimeAbstract; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidityMilestoningVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromAndThruDateTimeAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTimeAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.ValidityDerivationVisitor; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; @@ -44,6 +47,7 @@ import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -335,15 +339,15 @@ public Set visitUnitemporalDelta(UnitemporalDeltaAbstract unitemporalDel @Override public Set visitBitemporalSnapshot(BitemporalSnapshotAbstract bitemporalSnapshot) { - return Collections.emptySet(); + return bitemporalSnapshot.validityMilestoning().accept(VALIDITY_FIELDS_TO_IGNORE_IN_STAGING); } @Override public Set visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) { - return bitemporalDelta.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD) - .map(Collections::singleton) - .orElse(Collections.emptySet()); + Set fieldsToIgnore = bitemporalDelta.validityMilestoning().accept(VALIDITY_FIELDS_TO_IGNORE_IN_STAGING); + bitemporalDelta.mergeStrategy().accept(MergeStrategyVisitors.EXTRACT_DELETE_FIELD).ifPresent(fieldsToIgnore::add); + return fieldsToIgnore; } @Override @@ -463,4 +467,27 @@ public Set visitDateTime(ValidDateTimeAbstract validDateTime) return fieldsToIgnore; } }; + + private static final ValidityMilestoningVisitor> VALIDITY_FIELDS_TO_IGNORE_IN_STAGING = new ValidityMilestoningVisitor>() + { + @Override + public Set visitDateTime(ValidDateTimeAbstract validDateTime) + { + Set fieldsToIgnore = validDateTime.validityDerivation().accept(new ValidityDerivationVisitor>() + { + @Override + public Set visitSourceSpecifiesFromDateTime(SourceSpecifiesFromDateTimeAbstract sourceSpecifiesFromDateTime) + { + return new HashSet<>(Arrays.asList(sourceSpecifiesFromDateTime.sourceDateTimeFromField())); + } + + @Override + public Set visitSourceSpecifiesFromAndThruDateTime(SourceSpecifiesFromAndThruDateTimeAbstract sourceSpecifiesFromAndThruDateTime) + { + return new HashSet<>(Arrays.asList(sourceSpecifiesFromAndThruDateTime.sourceDateTimeFromField(), sourceSpecifiesFromAndThruDateTime.sourceDateTimeThruField())); + } + }); + return fieldsToIgnore; + } + }; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java index a1f94db7d64..071f5cc6efe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.schemaevolution; import org.finos.legend.engine.persistence.components.IngestModeTest; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.NontemporalSnapshot; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; @@ -26,6 +27,9 @@ import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; +import org.finos.legend.engine.persistence.components.scenarios.BitemporalDeltaSourceSpecifiesFromAndThroughScenarios; +import org.finos.legend.engine.persistence.components.scenarios.BitemporalDeltaSourceSpecifiesFromOnlyScenarios; +import org.finos.legend.engine.persistence.components.scenarios.TestScenario; import org.finos.legend.engine.persistence.components.transformer.TransformOptions; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.SchemaEvolutionCapability; @@ -682,4 +686,53 @@ void testSnapshotMilestoningWithNullableColumnMissingInStagingTable() List sqlsForSchemaEvolution = physicalPlanForSchemaEvolution.getSqlList(); Assertions.assertEquals(0, sqlsForSchemaEvolution.size()); } + + + @Test + void testBitemporalDeltaSourceSpeciesBothFieldsSchemaEvolution() + { + RelationalTransformer transformer = new RelationalTransformer(relationalSink, TransformOptions.builder().build()); + + BitemporalDeltaSourceSpecifiesFromAndThroughScenarios scenarios = new BitemporalDeltaSourceSpecifiesFromAndThroughScenarios(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + + Dataset mainTable = scenario.getMainTable(); + Dataset stagingTable = scenario.getStagingTable(); + IngestMode ingestMode = scenario.getIngestMode(); + + Set schemaEvolutionCapabilitySet = new HashSet<>(); + schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.ADD_COLUMN); + SchemaEvolution schemaEvolution = new SchemaEvolution(relationalSink, ingestMode, schemaEvolutionCapabilitySet); + + SchemaEvolutionResult result = schemaEvolution.buildLogicalPlanForSchemaEvolution(mainTable, stagingTable); + SqlPlan physicalPlanForSchemaEvolution = transformer.generatePhysicalPlan(result.logicalPlan()); + + // Use the planner utils to return the sql + List sqlsForSchemaEvolution = physicalPlanForSchemaEvolution.getSqlList(); + Assertions.assertTrue(sqlsForSchemaEvolution.isEmpty()); + } + + @Test + void testBitemporalDeltaSourceSpeciesFromOnlyFieldsSchemaEvolution() + { + RelationalTransformer transformer = new RelationalTransformer(relationalSink, TransformOptions.builder().build()); + + BitemporalDeltaSourceSpecifiesFromOnlyScenarios scenarios = new BitemporalDeltaSourceSpecifiesFromOnlyScenarios(); + TestScenario scenario = scenarios.BATCH_ID_BASED__NO_DEL_IND__NO_DATA_SPLITS(); + + Dataset mainTable = scenario.getDatasets().mainDataset(); + Dataset stagingTable = scenario.getDatasets().stagingDataset(); + IngestMode ingestMode = scenario.getIngestMode(); + + Set schemaEvolutionCapabilitySet = new HashSet<>(); + schemaEvolutionCapabilitySet.add(SchemaEvolutionCapability.ADD_COLUMN); + SchemaEvolution schemaEvolution = new SchemaEvolution(relationalSink, ingestMode, schemaEvolutionCapabilitySet); + + SchemaEvolutionResult result = schemaEvolution.buildLogicalPlanForSchemaEvolution(mainTable, stagingTable); + SqlPlan physicalPlanForSchemaEvolution = transformer.generatePhysicalPlan(result.logicalPlan()); + + // Use the planner utils to return the sql + List sqlsForSchemaEvolution = physicalPlanForSchemaEvolution.getSqlList(); + Assertions.assertTrue(sqlsForSchemaEvolution.isEmpty()); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java index d687618b2f0..020a4b3d524 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromAndThroughScenarios.java @@ -23,6 +23,9 @@ import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromAndThruDateTime; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import java.util.Arrays; import java.util.Optional; @@ -135,4 +138,41 @@ public TestScenario DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS() .build(); return new TestScenario(mainTableWithBitemporalSchemaWithDateTime, stagingTableWithBitemporalSchemaWithDeleteIndicatorAndDataSplit, ingestMode); } + + public TestScenario BATCH_ID_BASED__VALIDITY_FIELDS_SAME_NAME() + { + BitemporalDelta ingestMode = BitemporalDelta.builder() + .digestField(digestField) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .build()) + .validityMilestoning(ValidDateTime.builder() + .dateTimeFromName(validityFromReferenceField) + .dateTimeThruName(validityThroughReferenceField) + .validityDerivation(SourceSpecifiesFromAndThruDateTime.builder() + .sourceDateTimeFromField(validityFromReferenceField) + .sourceDateTimeThruField(validityThroughReferenceField) + .build()) + .build()) + .build(); + + SchemaDefinition bitempSchema = SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(amount) + .addFields(digest) + .addFields(batchIdIn) + .addFields(batchIdOut) + .addFields(validityFromReference) + .addFields(validityThroughReference) + .build(); + + Dataset mainTableWithBitemporalSchema = DatasetDefinition.builder() + .database(mainDbName).name(mainTableName).alias(mainTableAlias) + .schema(bitempSchema) + .build(); + + return new TestScenario(mainTableWithBitemporalSchema, stagingTableWithBitemporalSchema, ingestMode); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java index f2bc82539ff..7cf1d886f6c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/BitemporalDeltaSourceSpecifiesFromOnlyScenarios.java @@ -24,7 +24,9 @@ import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.ValidDateTime; import org.finos.legend.engine.persistence.components.ingestmode.validitymilestoning.derivation.SourceSpecifiesFromDateTime; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import java.util.Arrays; import java.util.Optional; @@ -341,4 +343,46 @@ public TestScenario DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS() .build()); return testScenario; } + + public TestScenario BATCH_ID_BASED__VALIDITY_FIELDS_SAME_NAME() + { + BitemporalDelta ingestMode = BitemporalDelta.builder() + .digestField(digestField) + .transactionMilestoning(BatchId.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .build()) + .validityMilestoning(ValidDateTime.builder() + .dateTimeFromName(validityFromReferenceField) + .dateTimeThruName(validityThroughReferenceField) + .validityDerivation(SourceSpecifiesFromDateTime.builder() + .sourceDateTimeFromField(validityFromReferenceField) + .build()) + .build()) + .build(); + + SchemaDefinition bitempSchema = SchemaDefinition.builder() + .addFields(id) + .addFields(name) + .addFields(amount) + .addFields(digest) + .addFields(batchIdIn) + .addFields(batchIdOut) + .addFields(validityFromReference) + .addFields(validityThroughReference) + .build(); + + Dataset mainTableWithBitemporalSchema = DatasetDefinition.builder() + .database(mainDbName).name(mainTableName).alias(mainTableAlias) + .schema(bitempSchema) + .build(); + + TestScenario testScenario = new TestScenario(ingestMode); + testScenario.setDatasets(Datasets.builder() + .mainDataset(mainTableWithBitemporalSchema) + .stagingDataset(stagingTableWithBitemporalFromOnlySchema) + .tempDataset(tempTableWithBitemporalFromOnlySchema) + .build()); + return testScenario; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromAndThroughDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromAndThroughDerivationTest.java index 265bb341ae5..1266389a2b7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromAndThroughDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromAndThroughDerivationTest.java @@ -51,4 +51,11 @@ void testBitemporalDeltaDateTimeBasedWithDeleteIndWithDataSplits() TestScenario scenario = scenarios.DATETIME_BASED__WITH_DEL_IND__WITH_DATA_SPLITS(); assertDerivedMainDataset(scenario); } + + @Test + void testBitemporalDeltaWithValidityFieldsHavingSameName() + { + TestScenario scenario = scenarios.BATCH_ID_BASED__VALIDITY_FIELDS_SAME_NAME(); + assertDerivedMainDataset(scenario); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromDerivationTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromDerivationTest.java index 16a971d2e0d..d30db4dbec7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromDerivationTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/derivation/BitemporalDeltaSourceSpecifiesFromDerivationTest.java @@ -93,4 +93,11 @@ void testBitemporalDeltaDateTimeBasedNoDeleteIndNoDataSplits() TestScenario scenario = scenarios.DATETIME_BASED__NO_DEL_IND__NO_DATA_SPLITS(); assertDerivedMainDataset(scenario); } + + @Test + void testBitemporalDeltaWithValidityFieldsHavingSameName() + { + TestScenario scenario = scenarios.BATCH_ID_BASED__VALIDITY_FIELDS_SAME_NAME(); + assertDerivedMainDataset(scenario); + } } From f4d6186e51e61a288fdb5dbb5628af944af3e569 Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Wed, 16 Aug 2023 12:44:34 +0800 Subject: [PATCH 03/57] Adding code for concurrent safety feature --- .../components/common/DatasetsAbstract.java | 3 + .../ingestmode/BulkLoadAbstract.java | 2 + .../DeriveMainDatasetSchemaFromStaging.java | 9 +++ .../ingestmode/IngestModeCaseConverter.java | 1 + .../datasets/DatasetCaseConverter.java | 29 ++++++++ ...seConverter.java => DatasetsEnricher.java} | 54 +++++++++++--- .../MetadataDatasetCaseConverter.java | 37 ---------- .../StagedFilesDatasetProperties.java | 4 + .../components/planner/AppendOnlyPlanner.java | 4 + .../planner/BitemporalDeltaPlanner.java | 4 + .../planner/BitemporalSnapshotPlanner.java | 4 + .../components/planner/BulkLoadPlanner.java | 18 ++++- .../planner/NontemporalDeltaPlanner.java | 4 + .../planner/NontemporalSnapshotPlanner.java | 4 + .../components/planner/Planner.java | 34 +++++++++ .../planner/UnitemporalDeltaPlanner.java | 4 + .../planner/UnitemporalSnapshotPlanner.java | 4 + .../util/LockInfoDatasetAbstract.java | 69 ++++++++++++++++++ .../components/util/LockInfoUtils.java | 60 +++++++++++++++ .../components/util/LockInfoUtilsTest.java | 67 +++++++++++++++++ .../MetadataDatasetCaseConverterTest.java | 4 +- .../components/relational/api/ApiUtils.java | 10 +-- .../api/GeneratorResultAbstract.java | 14 ++++ .../api/RelationalGeneratorAbstract.java | 28 ++++++- .../api/RelationalIngestorAbstract.java | 28 ++++++- ...eStagedFilesDatasetPropertiesAbstract.java | 2 - .../components/ingestmode/BulkLoadTest.java | 73 ++++++++++++++++++- 27 files changed, 508 insertions(+), 66 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/{DatasetsCaseConverter.java => DatasetsEnricher.java} (59%) delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/MetadataDatasetCaseConverter.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java index 0de9e4bd3c0..847c88f16c6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; +import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.immutables.value.Value.Immutable; import org.immutables.value.Value.Parameter; import org.immutables.value.Value.Style; @@ -45,4 +46,6 @@ public interface DatasetsAbstract Optional tempDatasetWithDeleteIndicator(); Optional stagingDatasetWithoutDuplicates(); + + Optional lockInfoDataset(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java index d74404b47a9..493def6e34e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java @@ -34,6 +34,8 @@ public interface BulkLoadAbstract extends IngestMode Optional digestField(); + Optional lineageField(); + Auditing auditing(); @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index e606015478d..107d84f33a6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -146,6 +146,15 @@ public Dataset visitBulkLoad(BulkLoadAbstract bulkLoad) addDigestField(mainSchemaFields, bulkLoad.digestField().get()); } bulkLoad.auditing().accept(new EnrichSchemaWithAuditing(mainSchemaFields, false)); + if (bulkLoad.lineageField().isPresent()) + { + Field lineageField = Field.builder() + .name(bulkLoad.lineageField().get()) + .type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())) + .primaryKey(false) + .build(); + mainSchemaFields.add(lineageField); + } return mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.addAllFields(mainSchemaFields).build()).build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java index 932aabbd05c..6bfba0bd509 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java @@ -162,6 +162,7 @@ public IngestMode visitBulkLoad(BulkLoadAbstract bulkLoad) .digestField(applyCase(bulkLoad.digestField())) .digestUdfName(bulkLoad.digestUdfName()) .generateDigest(bulkLoad.generateDigest()) + .lineageField(applyCase(bulkLoad.lineageField())) .auditing(bulkLoad.auditing().accept(new AuditingCaseConverter())) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java index 843f27d2636..3803820d032 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java @@ -14,6 +14,9 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; +import org.finos.legend.engine.persistence.components.util.LockInfoDataset; +import org.finos.legend.engine.persistence.components.util.MetadataDataset; + import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -132,4 +135,30 @@ public Dataset applyCaseOnDataset(Dataset dataset, Function stra throw new UnsupportedOperationException("Unsupported Dataset Conversion"); } + + public MetadataDataset applyCaseOnMetadataDataset(MetadataDataset metadataDataset, Function strategy) + { + return MetadataDataset.builder() + .metadataDatasetDatabaseName(metadataDataset.metadataDatasetDatabaseName().map(strategy)) + .metadataDatasetGroupName(metadataDataset.metadataDatasetGroupName().map(strategy)) + .metadataDatasetName(strategy.apply(metadataDataset.metadataDatasetName())) + .tableNameField(strategy.apply(metadataDataset.tableNameField())) + .batchStartTimeField(strategy.apply(metadataDataset.batchStartTimeField())) + .batchEndTimeField(strategy.apply(metadataDataset.batchEndTimeField())) + .batchStatusField(strategy.apply(metadataDataset.batchStatusField())) + .tableBatchIdField(strategy.apply(metadataDataset.tableBatchIdField())) + .stagingFiltersField(strategy.apply(metadataDataset.stagingFiltersField())) + .build(); + } + + public LockInfoDataset applyCaseOnLockInfoDataset(LockInfoDataset lockInfoDataset, Function strategy) + { + return LockInfoDataset.builder() + .database(lockInfoDataset.database().map(strategy)) + .group(lockInfoDataset.group().map(strategy)) + .name(strategy.apply(lockInfoDataset.name())) + .insertTimeField(strategy.apply(lockInfoDataset.insertTimeField())) + .lastUsedTimeField(strategy.apply(lockInfoDataset.lastUsedTimeField())) + .build(); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsEnricher.java similarity index 59% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsEnricher.java index 5bd42b020dc..8d91068bbfe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsEnricher.java @@ -15,23 +15,42 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import java.util.Optional; import java.util.function.Function; -public class DatasetsCaseConverter +public class DatasetsEnricher { DatasetCaseConverter datasetCaseConverter = new DatasetCaseConverter(); - MetadataDatasetCaseConverter metadataDatasetCaseConverter = new MetadataDatasetCaseConverter(); + private static final String LOCK_INFO_DATASET_SUFFIX = "_legend_persistence_lock"; - public Datasets applyCaseOnDatasets(Datasets datasets, Function strategy) + public Datasets enrichAndApplyCase(Datasets datasets, Function strategy) { Dataset main = datasetCaseConverter.applyCaseOnDataset(datasets.mainDataset(), strategy); Dataset staging = datasetCaseConverter.applyCaseOnDataset(datasets.stagingDataset(), strategy); Optional temp = datasets.tempDataset().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional tempWithDeleteIndicator = datasets.tempDatasetWithDeleteIndicator().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional stagingWithoutDuplicates = datasets.stagingDatasetWithoutDuplicates().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); + MetadataDataset metadataDataset = getMetadataDataset(datasets); + LockInfoDataset lockInfoDataset = getLockInfoDataset(datasets, main); + Optional metadata = Optional.ofNullable(datasetCaseConverter.applyCaseOnMetadataDataset(metadataDataset, strategy)); + Optional lockInfo = Optional.ofNullable(datasetCaseConverter.applyCaseOnLockInfoDataset(lockInfoDataset, strategy)); + + return Datasets.builder() + .mainDataset(main) + .stagingDataset(staging) + .tempDataset(temp) + .tempDatasetWithDeleteIndicator(tempWithDeleteIndicator) + .stagingDatasetWithoutDuplicates(stagingWithoutDuplicates) + .metadataDataset(metadata) + .lockInfoDataset(lockInfo) + .build(); + } + + private MetadataDataset getMetadataDataset(Datasets datasets) + { MetadataDataset metadataset; if (datasets.metadataDataset().isPresent()) { @@ -41,15 +60,26 @@ public Datasets applyCaseOnDatasets(Datasets datasets, Function { metadataset = MetadataDataset.builder().build(); } + return metadataset; + } - Optional metadata = Optional.ofNullable(metadataDatasetCaseConverter.applyCaseOnMetadataDataset(metadataset, strategy)); - return Datasets.builder() - .mainDataset(main) - .stagingDataset(staging) - .tempDataset(temp) - .tempDatasetWithDeleteIndicator(tempWithDeleteIndicator) - .stagingDatasetWithoutDuplicates(stagingWithoutDuplicates) - .metadataDataset(metadata) - .build(); + private LockInfoDataset getLockInfoDataset(Datasets datasets, Dataset main) + { + LockInfoDataset lockInfoDataset; + if (datasets.lockInfoDataset().isPresent()) + { + lockInfoDataset = datasets.lockInfoDataset().get(); + } + else + { + String datasetName = main.datasetReference().name().orElseThrow(IllegalStateException::new); + String lockDatasetName = datasetName + LOCK_INFO_DATASET_SUFFIX; + lockInfoDataset = LockInfoDataset.builder() + .database(main.datasetReference().database()) + .group(main.datasetReference().group()) + .name(lockDatasetName) + .build(); + } + return lockInfoDataset; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/MetadataDatasetCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/MetadataDatasetCaseConverter.java deleted file mode 100644 index 05805889c40..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/MetadataDatasetCaseConverter.java +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.logicalplan.datasets; - -import org.finos.legend.engine.persistence.components.util.MetadataDataset; - -import java.util.function.Function; - -public class MetadataDatasetCaseConverter -{ - public MetadataDataset applyCaseOnMetadataDataset(MetadataDataset metadataDataset, Function strategy) - { - return MetadataDataset.builder() - .metadataDatasetDatabaseName(metadataDataset.metadataDatasetDatabaseName().map(strategy)) - .metadataDatasetGroupName(metadataDataset.metadataDatasetGroupName().map(strategy)) - .metadataDatasetName(strategy.apply(metadataDataset.metadataDatasetName())) - .tableNameField(strategy.apply(metadataDataset.tableNameField())) - .batchStartTimeField(strategy.apply(metadataDataset.batchStartTimeField())) - .batchEndTimeField(strategy.apply(metadataDataset.batchEndTimeField())) - .batchStatusField(strategy.apply(metadataDataset.batchStatusField())) - .tableBatchIdField(strategy.apply(metadataDataset.tableBatchIdField())) - .stagingFiltersField(strategy.apply(metadataDataset.stagingFiltersField())) - .build(); - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java index 806d8198cf9..bcf38bd154f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java @@ -14,8 +14,12 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; +import java.util.List; + public interface StagedFilesDatasetProperties extends DatasetReference { + List files(); + default StagedFilesDatasetProperties datasetReference() { return this; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java index c0fb69399ce..21a21628db3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java @@ -122,6 +122,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { operations.add(Create.of(true, stagingDataset())); } + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java index fe6c7e7be97..99838078718 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java @@ -272,6 +272,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) operations.add(Create.of(true, stagingDataset)); } } + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java index 45076cab36c..854d0cceeb4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java @@ -97,6 +97,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) operations.add(Create.of(true, stagingDataset())); } operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index ab837ee8c15..dae4d51dd6c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -24,9 +24,10 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetAbstract; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; @@ -45,15 +46,20 @@ class BulkLoadPlanner extends Planner { + + private StagedFilesDataset stagedFilesDataset; + BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions) { super(datasets, ingestMode, plannerOptions); // validation - if (!(datasets.stagingDataset() instanceof StagedFilesDatasetAbstract)) + if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) { throw new IllegalArgumentException("Only StagedFilesDataset are allowed under Bulk Load"); } + + stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); } @Override @@ -90,6 +96,14 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set files = stagedFilesDataset.stagedFilesDatasetProperties().files(); + String lineageValue = String.join(",", files); + fieldsToSelect.add(StringValue.of(lineageValue)); + } + Dataset selectStage = Selection.builder().source(stagingDataset()).addAllFields(fieldsToSelect).build(); return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java index 07dec197d57..fe4d9638fba 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java @@ -307,6 +307,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { operations.add(Create.of(true, stagingDataset())); } + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java index d81447aba82..a0424f612f7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java @@ -132,6 +132,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { operations.add(Create.of(true, stagingDataset())); } + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 2b1c4b8a8c7..5943de07898 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -30,7 +30,10 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestampAbstract; import org.finos.legend.engine.persistence.components.util.Capability; +import org.finos.legend.engine.persistence.components.util.LockInfoDataset; +import org.finos.legend.engine.persistence.components.util.LockInfoUtils; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -87,6 +90,12 @@ default boolean createStagingDataset() { return false; } + + @Default + default boolean enableConcurrentSafety() + { + return false; + } } private final Datasets datasets; @@ -123,6 +132,11 @@ protected Optional metadataDataset() return datasets.metadataDataset(); } + protected Optional lockInfoDataset() + { + return datasets.lockInfoDataset(); + } + protected IngestMode ingestMode() { return ingestMode; @@ -140,6 +154,26 @@ public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) return null; } + public LogicalPlan buildLogicalPlanForInitializeLock(Resources resources) + { + if (options().enableConcurrentSafety()) + { + LockInfoUtils lockInfoUtils = new LockInfoUtils(datasets.lockInfoDataset().orElseThrow(IllegalStateException::new)); + return LogicalPlan.of(Collections.singleton(lockInfoUtils.initializeLockInfo(BatchStartTimestampAbstract.INSTANCE))); + } + return null; + } + + public LogicalPlan buildLogicalPlanForAcquireLock(Resources resources) + { + if (options().enableConcurrentSafety()) + { + LockInfoUtils lockInfoUtils = new LockInfoUtils(datasets.lockInfoDataset().orElseThrow(IllegalStateException::new)); + return LogicalPlan.of(Collections.singleton(lockInfoUtils.updateLockInfo(BatchStartTimestampAbstract.INSTANCE))); + } + return null; + } + public abstract LogicalPlan buildLogicalPlanForPreActions(Resources resources); public LogicalPlan buildLogicalPlanForPostActions(Resources resources) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java index 0bde27bd9bd..b93ef293767 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java @@ -123,6 +123,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) operations.add(Create.of(true, stagingDataset())); } operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index b02979298f9..15464149218 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -98,6 +98,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) operations.add(Create.of(true, stagingDataset())); } operations.add(Create.of(true, metadataDataset().orElseThrow(IllegalStateException::new).get())); + if (options().enableConcurrentSafety()) + { + operations.add(Create.of(true, lockInfoDataset().orElseThrow(IllegalStateException::new).get())); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java new file mode 100644 index 00000000000..296bf1b3a3c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java @@ -0,0 +1,69 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.immutables.value.Value; +import java.util.Optional; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface LockInfoDatasetAbstract +{ + + Optional database(); + + Optional group(); + + String name(); + + @Value.Default + default String insertTimeField() + { + return "insert_ts_utc"; + } + + @Value.Default + default String lastUsedTimeField() + { + return "last_used_ts_utc"; + } + + + @Value.Derived + default Dataset get() + { + return DatasetDefinition.builder() + .database(database()) + .group(group()) + .name(name()) + .schema(SchemaDefinition.builder() + .addFields(Field.builder().name(insertTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) + .addFields(Field.builder().name(lastUsedTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) + .build()) + .build(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java new file mode 100644 index 00000000000..a4aafdf3b39 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java @@ -0,0 +1,60 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Exists; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Not; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetReference; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Update; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class LockInfoUtils +{ + private final LockInfoDataset lockInfoDataset; + private final Dataset dataset; + + public LockInfoUtils(LockInfoDataset lockInfoDataset) + { + this.lockInfoDataset = lockInfoDataset; + this.dataset = lockInfoDataset.get(); + } + + public Insert initializeLockInfo(BatchStartTimestamp batchStartTimestamp) + { + DatasetReference metaTableRef = this.dataset.datasetReference(); + FieldValue insertTimeField = FieldValue.builder().datasetRef(metaTableRef).fieldName(lockInfoDataset.insertTimeField()).build(); + List insertFields = Arrays.asList(insertTimeField); + List selectFields = Arrays.asList(batchStartTimestamp); + Condition condition = Not.of(Exists.of(Selection.builder().addFields(All.INSTANCE).source(dataset).build())); + return Insert.of(dataset, Selection.builder().addAllFields(selectFields).condition(condition).build(), insertFields); + } + + public Update updateLockInfo(BatchStartTimestamp batchStartTimestamp) + { + List> keyValuePairs = new ArrayList<>(); + keyValuePairs.add(Pair.of(FieldValue.builder().datasetRef(dataset.datasetReference()).fieldName(lockInfoDataset.insertTimeField()).build(), batchStartTimestamp)); + Update update = Update.builder().dataset(dataset).addAllKeyValuePairs(keyValuePairs).build(); + return update; + } + +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java new file mode 100644 index 00000000000..c79fd8b5616 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java @@ -0,0 +1,67 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Update; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; +import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; +import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; +import org.finos.legend.engine.persistence.components.transformer.TransformOptions; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.time.Clock; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.List; + +public class LockInfoUtilsTest +{ + + private final ZonedDateTime executionZonedDateTime = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); + private final TransformOptions transformOptions = TransformOptions.builder().executionTimestampClock(Clock.fixed(executionZonedDateTime.toInstant(), ZoneOffset.UTC)).build(); + + private LockInfoDataset lockInfoDataset = LockInfoDataset.builder().name("main_table_lock").build(); + + + @Test + public void testInitializeLockInfo() + { + LockInfoUtils store = new LockInfoUtils(lockInfoDataset); + Insert operation = store.initializeLockInfo(BatchStartTimestamp.INSTANCE); + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = "INSERT INTO main_table_lock (\"insert_ts_utc\") (SELECT '2000-01-01 00:00:00' WHERE NOT (EXISTS (SELECT * FROM main_table_lock as main_table_lock)))"; + Assertions.assertEquals(expectedSql, list.get(0)); + } + + @Test + public void testUpdateMetaStore() + { + LockInfoUtils store = new LockInfoUtils(lockInfoDataset); + Update operation = store.updateLockInfo(BatchStartTimestamp.INSTANCE); + RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = "UPDATE main_table_lock as main_table_lock SET main_table_lock.\"insert_ts_utc\" = '2000-01-01 00:00:00'"; + Assertions.assertEquals(expectedSql, list.get(0)); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataDatasetCaseConverterTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataDatasetCaseConverterTest.java index f5ec1e22884..2790db354c7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataDatasetCaseConverterTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/MetadataDatasetCaseConverterTest.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.util; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.MetadataDatasetCaseConverter; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetCaseConverter; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -35,7 +35,7 @@ public class MetadataDatasetCaseConverterTest @Test public void testMetadataDatasetCaseConverter() { - MetadataDatasetCaseConverter converter = new MetadataDatasetCaseConverter(); + DatasetCaseConverter converter = new DatasetCaseConverter(); MetadataDataset metadataDataset = MetadataDataset.builder() .metadataDatasetDatabaseName(databaseName) .metadataDatasetGroupName(groupName) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index e229533c82b..30435501213 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -19,7 +19,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.IngestModeCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsEnricher; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.relational.CaseConversion; @@ -38,16 +38,16 @@ public static Dataset deriveMainDatasetFromStaging(Datasets datasets, IngestMode return mainDataset; } - public static Datasets applyCase(Datasets datasets, CaseConversion caseConversion) + public static Datasets enrichAndApplyCase(Datasets datasets, CaseConversion caseConversion) { - DatasetsCaseConverter converter = new DatasetsCaseConverter(); + DatasetsEnricher converter = new DatasetsEnricher(); if (caseConversion == CaseConversion.TO_UPPER) { - return converter.applyCaseOnDatasets(datasets, String::toUpperCase); + return converter.enrichAndApplyCase(datasets, String::toUpperCase); } if (caseConversion == CaseConversion.TO_LOWER) { - return converter.applyCaseOnDatasets(datasets, String::toLowerCase); + return converter.enrichAndApplyCase(datasets, String::toLowerCase); } return datasets; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 9b7e4ad8576..847ea4b7f15 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -42,6 +42,10 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan preActionsSqlPlan(); + public abstract Optional initializeLockSqlPlan(); + + public abstract Optional acquireLockSqlPlan(); + public abstract Optional schemaEvolutionSqlPlan(); public abstract Optional schemaEvolutionDataset(); @@ -63,6 +67,16 @@ public List preActionsSql() return preActionsSqlPlan().getSqlList(); } + public List initializeLockSql() + { + return initializeLockSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); + } + + public List acquireLockSql() + { + return acquireLockSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); + } + public List schemaEvolutionSql() { return schemaEvolutionSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index e8609365a9c..df4bafa3df2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -94,6 +94,12 @@ public boolean createStagingDataset() return false; } + @Default + public boolean enableConcurrentSafety() + { + return false; + } + public abstract Set schemaEvolutionCapabilitySet(); public abstract Optional batchStartTimestampPattern(); @@ -118,6 +124,7 @@ protected PlannerOptions plannerOptions() .collectStatistics(collectStatistics()) .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) + .enableConcurrentSafety(enableConcurrentSafety()) .build(); } @@ -164,7 +171,7 @@ public List generateOperationsWithDataSplits(Datasets datasets, GeneratorResult generateOperations(Datasets datasets, Resources resources) { IngestMode ingestModeWithCaseConversion = ApiUtils.applyCase(ingestMode(), caseConversion()); - Datasets datasetsWithCaseConversion = ApiUtils.applyCase(datasets, caseConversion()); + Datasets datasetsWithCaseConversion = ApiUtils.enrichAndApplyCase(datasets, caseConversion()); Dataset enrichedMainDataset = ApiUtils.deriveMainDatasetFromStaging(datasetsWithCaseConversion, ingestModeWithCaseConversion); Datasets enrichedDatasets = datasetsWithCaseConversion.withMainDataset(enrichedMainDataset); Planner planner = Planners.get(enrichedDatasets, ingestModeWithCaseConversion, plannerOptions()); @@ -187,6 +194,23 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan preActionsLogicalPlan = planner.buildLogicalPlanForPreActions(resources); SqlPlan preActionsSqlPlan = transformer.generatePhysicalPlan(preActionsLogicalPlan); + // initialize-lock + LogicalPlan initializeLockLogicalPlan = planner.buildLogicalPlanForInitializeLock(resources); + Optional initializeLockSqlPlan = Optional.empty(); + if (initializeLockLogicalPlan != null) + { + initializeLockSqlPlan = Optional.of(transformer.generatePhysicalPlan(initializeLockLogicalPlan)); + } + + // acquire-lock + LogicalPlan acquireLockLogicalPlan = planner.buildLogicalPlanForAcquireLock(resources); + Optional acquireLockSqlPlan = Optional.empty(); + if (acquireLockLogicalPlan != null) + { + acquireLockSqlPlan = Optional.of(transformer.generatePhysicalPlan(acquireLockLogicalPlan)); + } + + // schema evolution Optional schemaEvolutionSqlPlan = Optional.empty(); Optional schemaEvolutionDataset = Optional.empty(); @@ -229,6 +253,8 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann return GeneratorResult.builder() .preActionsSqlPlan(preActionsSqlPlan) + .initializeLockSqlPlan(initializeLockSqlPlan) + .acquireLockSqlPlan(acquireLockSqlPlan) .schemaEvolutionSqlPlan(schemaEvolutionSqlPlan) .schemaEvolutionDataset(schemaEvolutionDataset) .ingestSqlPlan(ingestSqlPlan) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index b9bfd975040..cc702e67b82 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -143,6 +143,12 @@ public Clock executionTimestampClock() return Clock.systemUTC(); } + @Default + public boolean enableConcurrentSafety() + { + return false; + } + @Default public Set schemaEvolutionCapabilitySet() { @@ -163,6 +169,7 @@ protected PlannerOptions plannerOptions() .collectStatistics(collectStatistics()) .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) + .enableConcurrentSafety(enableConcurrentSafety()) .build(); } @@ -208,6 +215,7 @@ public Datasets create(Datasets datasets) { init(datasets); createAllDatasets(); + initializeLock(); return this.enrichedDatasets; } @@ -296,6 +304,22 @@ private void createAllDatasets() executor.executePhysicalPlan(generatorResult.preActionsSqlPlan()); } + private void initializeLock() + { + if (enableConcurrentSafety()) + { + executor.executePhysicalPlan(generatorResult.initializeLockSqlPlan().orElseThrow(IllegalStateException::new)); + } + } + + private void acquireLock() + { + if (enableConcurrentSafety()) + { + executor.executePhysicalPlan(generatorResult.acquireLockSqlPlan().orElseThrow(IllegalStateException::new)); + } + } + private List ingest(List dataSplitRanges) { if (enrichedIngestMode instanceof BulkLoad) @@ -318,6 +342,7 @@ private List performFullIngestion(RelationalConnection connectio if (createDatasets()) { createAllDatasets(); + initializeLock(); } // Evolve Schema @@ -353,7 +378,7 @@ private void init(Datasets datasets) } // 1. Case handling enrichedIngestMode = ApiUtils.applyCase(ingestMode(), caseConversion()); - enrichedDatasets = ApiUtils.applyCase(datasets, caseConversion()); + enrichedDatasets = ApiUtils.enrichAndApplyCase(datasets, caseConversion()); // 2. Initialize transformer transformer = new RelationalTransformer(relationalSink(), transformOptions()); @@ -414,6 +439,7 @@ private List performIngestion(Datasets datasets, Transformer results = new ArrayList<>(); int dataSplitIndex = 0; int dataSplitsCount = (dataSplitRanges == null || dataSplitRanges.isEmpty()) ? 0 : dataSplitRanges.size(); + acquireLock(); do { Optional dataSplitRange = Optional.ofNullable(dataSplitsCount == 0 ? null : dataSplitRanges.get(dataSplitIndex)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java index 70807e11a5f..c3f190f6b2e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/logicalplan/datasets/SnowflakeStagedFilesDatasetPropertiesAbstract.java @@ -33,7 +33,5 @@ public interface SnowflakeStagedFilesDatasetPropertiesAbstract extends StagedFil { String location(); - List files(); - Optional fileFormat(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 46effc1d968..ed9127b2e30 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -186,6 +186,7 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() .generateDigest(true) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .digestUdfName("LAKEHOUSE_MD5") + .lineageField("lake_lineage") .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -216,12 +217,16 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() List ingestSql = operations.ingestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY,\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"APPEND_TIME\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY," + + "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"APPEND_TIME\" DATETIME,\"LAKE_LINEAGE\" VARCHAR)"; String expectedIngestSql = "COPY INTO \"MY_DB\".\"MY_NAME\" " + - "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"APPEND_TIME\") " + - "FROM (SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + + "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"APPEND_TIME\", \"LAKE_LINEAGE\") " + + "FROM " + + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('COL_INT',legend_persistence_stage.$1,'COL_INTEGER',legend_persistence_stage.$2))," + - "'2000-01-01 00:00:00' FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + + "'2000-01-01 00:00:00','/path/xyz/file1.csv,/path/xyz/file2.csv' " + + "FROM my_location (FILE_FORMAT => 'my_file_format', " + + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -306,4 +311,64 @@ public void testBulkLoadStagedFilesDatasetNotProvided() Assertions.assertTrue(e.getMessage().contains("Only StagedFilesDataset are allowed under Bulk Load")); } } + + @Test + public void testBulkLoadWithDigestAndLineage() + { + BulkLoad bulkLoad = BulkLoad.builder() + .digestField("digest") + .generateDigest(true) + .digestUdfName("LAKEHOUSE_UDF") + .lineageField("lake_lineage") + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + SnowflakeStagedFilesDatasetProperties.builder() + .location("my_location") + .fileFormat("my_file_format") + .addAllFiles(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(SnowflakeSink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"append_time\" DATETIME,\"lake_lineage\" VARCHAR)"; + + String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + + "(\"col_int\", \"col_integer\", \"digest\", \"append_time\", \"lake_lineage\") " + + "FROM " + + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + + "'2000-01-01 00:00:00','/path/xyz/file1.csv,/path/xyz/file2.csv' " + + "FROM my_location (FILE_FORMAT => 'my_file_format', " + + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + + "on_error = 'ABORT_STATEMENT'"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + + Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + } } From f43bd95c6b0efd793cd8c4281b56653ebcf8a74d Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Wed, 16 Aug 2023 17:10:47 +0800 Subject: [PATCH 04/57] Adding test for Multi Ingest Mode with concurrent Safety --- ...richer.java => DatasetsCaseConverter.java} | 45 +------ .../components/relational/api/ApiUtils.java | 52 +++++++- .../api/RelationalIngestorAbstract.java | 8 +- .../ingestmode/mixed/MixedIngestModeTest.java | 116 ++++++++++++++++++ .../unitemporal/MultiTableIngestionTest.java | 2 +- .../data/mixed/input/staging_data_pass1.csv | 3 + .../data/mixed/input/staging_data_pass2.csv | 3 + .../data/mixed/input/staging_data_pass3.csv | 3 + .../data/mixed/output/expected_pass1.csv | 3 + .../data/mixed/output/expected_pass2.csv | 5 + .../data/mixed/output/expected_pass3.csv | 7 ++ 11 files changed, 198 insertions(+), 49 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/{DatasetsEnricher.java => DatasetsCaseConverter.java} (57%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass3.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass3.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsEnricher.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java similarity index 57% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsEnricher.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java index 8d91068bbfe..ca6d79e8d38 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsEnricher.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java @@ -21,22 +21,19 @@ import java.util.Optional; import java.util.function.Function; -public class DatasetsEnricher +public class DatasetsCaseConverter { DatasetCaseConverter datasetCaseConverter = new DatasetCaseConverter(); - private static final String LOCK_INFO_DATASET_SUFFIX = "_legend_persistence_lock"; - public Datasets enrichAndApplyCase(Datasets datasets, Function strategy) + public Datasets applyCase(Datasets datasets, Function strategy) { Dataset main = datasetCaseConverter.applyCaseOnDataset(datasets.mainDataset(), strategy); Dataset staging = datasetCaseConverter.applyCaseOnDataset(datasets.stagingDataset(), strategy); Optional temp = datasets.tempDataset().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional tempWithDeleteIndicator = datasets.tempDatasetWithDeleteIndicator().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional stagingWithoutDuplicates = datasets.stagingDatasetWithoutDuplicates().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); - MetadataDataset metadataDataset = getMetadataDataset(datasets); - LockInfoDataset lockInfoDataset = getLockInfoDataset(datasets, main); - Optional metadata = Optional.ofNullable(datasetCaseConverter.applyCaseOnMetadataDataset(metadataDataset, strategy)); - Optional lockInfo = Optional.ofNullable(datasetCaseConverter.applyCaseOnLockInfoDataset(lockInfoDataset, strategy)); + Optional metadata = Optional.ofNullable(datasetCaseConverter.applyCaseOnMetadataDataset(datasets.metadataDataset().orElseThrow(IllegalStateException::new), strategy)); + Optional lockInfo = Optional.ofNullable(datasetCaseConverter.applyCaseOnLockInfoDataset(datasets.lockInfoDataset().orElseThrow(IllegalStateException::new), strategy)); return Datasets.builder() .mainDataset(main) @@ -48,38 +45,4 @@ public Datasets enrichAndApplyCase(Datasets datasets, Function s .lockInfoDataset(lockInfo) .build(); } - - private MetadataDataset getMetadataDataset(Datasets datasets) - { - MetadataDataset metadataset; - if (datasets.metadataDataset().isPresent()) - { - metadataset = datasets.metadataDataset().get(); - } - else - { - metadataset = MetadataDataset.builder().build(); - } - return metadataset; - } - - private LockInfoDataset getLockInfoDataset(Datasets datasets, Dataset main) - { - LockInfoDataset lockInfoDataset; - if (datasets.lockInfoDataset().isPresent()) - { - lockInfoDataset = datasets.lockInfoDataset().get(); - } - else - { - String datasetName = main.datasetReference().name().orElseThrow(IllegalStateException::new); - String lockDatasetName = datasetName + LOCK_INFO_DATASET_SUFFIX; - lockInfoDataset = LockInfoDataset.builder() - .database(main.datasetReference().database()) - .group(main.datasetReference().group()) - .name(lockDatasetName) - .build(); - } - return lockInfoDataset; - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index 30435501213..7232fa61be9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -19,14 +19,18 @@ import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.IngestModeCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsEnricher; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.relational.CaseConversion; +import org.finos.legend.engine.persistence.components.util.LockInfoDataset; +import org.finos.legend.engine.persistence.components.util.MetadataDataset; import java.util.List; public class ApiUtils { + private static final String LOCK_INFO_DATASET_SUFFIX = "_legend_persistence_lock"; + public static Dataset deriveMainDatasetFromStaging(Datasets datasets, IngestMode ingestMode) { Dataset mainDataset = datasets.mainDataset(); @@ -40,16 +44,19 @@ public static Dataset deriveMainDatasetFromStaging(Datasets datasets, IngestMode public static Datasets enrichAndApplyCase(Datasets datasets, CaseConversion caseConversion) { - DatasetsEnricher converter = new DatasetsEnricher(); + DatasetsCaseConverter converter = new DatasetsCaseConverter(); + MetadataDataset metadataDataset = getMetadataDataset(datasets); + LockInfoDataset lockInfoDataset = getLockInfoDataset(datasets); + Datasets enrichedDatasets = datasets.withMetadataDataset(metadataDataset).withLockInfoDataset(lockInfoDataset); if (caseConversion == CaseConversion.TO_UPPER) { - return converter.enrichAndApplyCase(datasets, String::toUpperCase); + return converter.applyCase(enrichedDatasets, String::toUpperCase); } if (caseConversion == CaseConversion.TO_LOWER) { - return converter.enrichAndApplyCase(datasets, String::toLowerCase); + return converter.applyCase(enrichedDatasets, String::toLowerCase); } - return datasets; + return enrichedDatasets; } public static IngestMode applyCase(IngestMode ingestMode, CaseConversion caseConversion) @@ -64,4 +71,39 @@ public static IngestMode applyCase(IngestMode ingestMode, CaseConversion caseCon } return ingestMode; } + + private static MetadataDataset getMetadataDataset(Datasets datasets) + { + MetadataDataset metadataset; + if (datasets.metadataDataset().isPresent()) + { + metadataset = datasets.metadataDataset().get(); + } + else + { + metadataset = MetadataDataset.builder().build(); + } + return metadataset; + } + + private static LockInfoDataset getLockInfoDataset(Datasets datasets) + { + Dataset main = datasets.mainDataset(); + LockInfoDataset lockInfoDataset; + if (datasets.lockInfoDataset().isPresent()) + { + lockInfoDataset = datasets.lockInfoDataset().get(); + } + else + { + String datasetName = main.datasetReference().name().orElseThrow(IllegalStateException::new); + String lockDatasetName = datasetName + LOCK_INFO_DATASET_SUFFIX; + lockInfoDataset = LockInfoDataset.builder() + .database(main.datasetReference().database()) + .group(main.datasetReference().group()) + .name(lockDatasetName) + .build(); + } + return lockInfoDataset; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index cc702e67b82..93a747bc2e6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -308,7 +308,9 @@ private void initializeLock() { if (enableConcurrentSafety()) { - executor.executePhysicalPlan(generatorResult.initializeLockSqlPlan().orElseThrow(IllegalStateException::new)); + Map placeHolderKeyValues = new HashMap<>(); + placeHolderKeyValues.put(BATCH_START_TS_PATTERN, LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER)); + executor.executePhysicalPlan(generatorResult.initializeLockSqlPlan().orElseThrow(IllegalStateException::new), placeHolderKeyValues); } } @@ -316,7 +318,9 @@ private void acquireLock() { if (enableConcurrentSafety()) { - executor.executePhysicalPlan(generatorResult.acquireLockSqlPlan().orElseThrow(IllegalStateException::new)); + Map placeHolderKeyValues = new HashMap<>(); + placeHolderKeyValues.put(BATCH_START_TS_PATTERN, LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER)); + executor.executePhysicalPlan(generatorResult.acquireLockSqlPlan().orElseThrow(IllegalStateException::new), placeHolderKeyValues); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java new file mode 100644 index 00000000000..212f912d7d1 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java @@ -0,0 +1,116 @@ +package org.finos.legend.engine.persistence.components.ingestmode.mixed; + +import org.finos.legend.engine.persistence.components.BaseTest; +import org.finos.legend.engine.persistence.components.TestUtils; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; +import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; +import org.finos.legend.engine.persistence.components.ingestmode.unitemporal.MultiTableIngestionTest; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +import static org.finos.legend.engine.persistence.components.TestUtils.*; + +public class MixedIngestModeTest extends BaseTest { + + private final String basePath = "src/test/resources/data/mixed/"; + + @Test + public void testMultiIngestionTypes() throws Exception { + + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); + + String[] schema = new String[]{idName, nameName, incomeName, startTimeName, expiryDateName, digestName, batchIdInName, batchIdOutName, batchTimeInName, batchTimeOutName}; + + // Create staging table + createStagingTable(stagingTable); + + UnitemporalDelta unitemporalDelta = UnitemporalDelta.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .build(); + + UnitemporalSnapshot unitemporalSnapshot = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .build(); + + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // Pass 1 : unitemporalSnapshot + String path = basePath + "input/staging_data_pass1.csv"; + loadBasicStagingData(path); + String expectedPath = basePath + "output/expected_pass1.csv"; + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(unitemporalSnapshot) + .relationalSink(H2Sink.get()) + .executionTimestampClock(fixedClock_2000_01_01) + .cleanupStagingData(true) + .collectStatistics(true) + .enableSchemaEvolution(false) + .enableConcurrentSafety(true) + .build(); + + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + MultiTableIngestionTest.verifyResults(1, schema, expectedPath, "main", result, expectedStats); + + // Pass 2 : unitemporalDelta + path = basePath + "input/staging_data_pass2.csv"; + loadBasicStagingData(path); + expectedPath = basePath + "output/expected_pass2.csv"; + expectedStats = createExpectedStatsMap(3, 0, 1, 1, 0); + + ingestor = RelationalIngestor.builder() + .ingestMode(unitemporalDelta) + .relationalSink(H2Sink.get()) + .executionTimestampClock(fixedClock_2000_01_01) + .cleanupStagingData(true) + .collectStatistics(true) + .enableSchemaEvolution(false) + .enableConcurrentSafety(true) + .build(); + + result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + MultiTableIngestionTest.verifyResults(2, schema, expectedPath, "main", result, expectedStats); + + // Pass 3 : unitemporalSnapshot + path = basePath + "input/staging_data_pass3.csv"; + loadBasicStagingData(path); + expectedPath = basePath + "output/expected_pass3.csv"; + expectedStats = createExpectedStatsMap(3, 0, 1, 1, 2); + + ingestor = RelationalIngestor.builder() + .ingestMode(unitemporalSnapshot) + .relationalSink(H2Sink.get()) + .executionTimestampClock(fixedClock_2000_01_01) + .cleanupStagingData(true) + .collectStatistics(true) + .enableSchemaEvolution(false) + .enableConcurrentSafety(true) + .build(); + + result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + MultiTableIngestionTest.verifyResults(3, schema, expectedPath, "main", result, expectedStats); + } + +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java index 515fd9ac080..8b3a608bfd3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/MultiTableIngestionTest.java @@ -298,7 +298,7 @@ private void loadStagingDataset2(String path) throws Exception } - private void verifyResults(int batchId, String[] schema, String expectedDataPath, String tableName, IngestorResult result, Map expectedStats) throws IOException + public static void verifyResults(int batchId, String[] schema, String expectedDataPath, String tableName, IngestorResult result, Map expectedStats) throws IOException { Assertions.assertEquals(batchId, result.batchId().get()); Assertions.assertEquals("2000-01-01 00:00:00", result.ingestionTimestampUTC()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass1.csv new file mode 100644 index 00000000000..72cc5edbebc --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass1.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass2.csv new file mode 100644 index 00000000000..0d58c6909b0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass2.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass3.csv new file mode 100644 index 00000000000..2c9ef5a9268 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/input/staging_data_pass3.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1 +4,MATT,8000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4_UPDATED +5,HENRY,7000,2020-01-07 00:00:00.0,2022-12-07,DIGEST5 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass1.csv new file mode 100644 index 00000000000..5c9aa061073 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass1.csv @@ -0,0 +1,3 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass2.csv new file mode 100644 index 00000000000..2e97278a1ec --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass2.csv @@ -0,0 +1,5 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass3.csv new file mode 100644 index 00000000000..35b6c066031 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/mixed/output/expected_pass3.csv @@ -0,0 +1,7 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,4000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2_UPDATED,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +4,MATT,8000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4_UPDATED,3,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +5,HENRY,7000,2020-01-07 00:00:00.0,2022-12-07,DIGEST5,3,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file From f7129807283701014a56fd58cf44bf8712b8ca57 Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Thu, 17 Aug 2023 09:59:09 +0800 Subject: [PATCH 05/57] Adding tests for concurrent safety --- .../components/planner/Planner.java | 2 +- .../util/LockInfoDatasetAbstract.java | 6 + .../components/util/LockInfoUtils.java | 9 +- .../components/util/LockInfoUtilsTest.java | 7 +- .../api/RelationalIngestorAbstract.java | 10 +- .../persistence/components/BaseTest.java | 6 +- .../ingestmode/mixed/MixedIngestModeTest.java | 20 ++- .../mixed/UnitemporalConcurrentTest.java | 47 ++++++++ .../mixed/UnitemporalDeltaRunner.java | 114 ++++++++++++++++++ 9 files changed, 207 insertions(+), 14 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 5943de07898..be3242e61f0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -159,7 +159,7 @@ public LogicalPlan buildLogicalPlanForInitializeLock(Resources resources) if (options().enableConcurrentSafety()) { LockInfoUtils lockInfoUtils = new LockInfoUtils(datasets.lockInfoDataset().orElseThrow(IllegalStateException::new)); - return LogicalPlan.of(Collections.singleton(lockInfoUtils.initializeLockInfo(BatchStartTimestampAbstract.INSTANCE))); + return LogicalPlan.of(Collections.singleton(lockInfoUtils.initializeLockInfo(mainDataset().datasetReference().name().orElseThrow(IllegalStateException::new), BatchStartTimestampAbstract.INSTANCE))); } return null; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java index 296bf1b3a3c..a06bc9c5ddc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoDatasetAbstract.java @@ -52,6 +52,11 @@ default String lastUsedTimeField() return "last_used_ts_utc"; } + @Value.Default + default String tableNameField() + { + return "table_name"; + } @Value.Derived default Dataset get() @@ -63,6 +68,7 @@ default Dataset get() .schema(SchemaDefinition.builder() .addFields(Field.builder().name(insertTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) .addFields(Field.builder().name(lastUsedTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) + .addFields(Field.builder().name(tableNameField()).type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())).unique(true).build()) .build()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java index a4aafdf3b39..6ff168070e5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LockInfoUtils.java @@ -39,12 +39,13 @@ public LockInfoUtils(LockInfoDataset lockInfoDataset) this.dataset = lockInfoDataset.get(); } - public Insert initializeLockInfo(BatchStartTimestamp batchStartTimestamp) + public Insert initializeLockInfo(String tableName, BatchStartTimestamp batchStartTimestamp) { DatasetReference metaTableRef = this.dataset.datasetReference(); FieldValue insertTimeField = FieldValue.builder().datasetRef(metaTableRef).fieldName(lockInfoDataset.insertTimeField()).build(); - List insertFields = Arrays.asList(insertTimeField); - List selectFields = Arrays.asList(batchStartTimestamp); + FieldValue tableNameField = FieldValue.builder().datasetRef(metaTableRef).fieldName(lockInfoDataset.tableNameField()).build(); + List insertFields = Arrays.asList(insertTimeField, tableNameField); + List selectFields = Arrays.asList(batchStartTimestamp, StringValue.of(tableName)); Condition condition = Not.of(Exists.of(Selection.builder().addFields(All.INSTANCE).source(dataset).build())); return Insert.of(dataset, Selection.builder().addAllFields(selectFields).condition(condition).build(), insertFields); } @@ -52,7 +53,7 @@ public Insert initializeLockInfo(BatchStartTimestamp batchStartTimestamp) public Update updateLockInfo(BatchStartTimestamp batchStartTimestamp) { List> keyValuePairs = new ArrayList<>(); - keyValuePairs.add(Pair.of(FieldValue.builder().datasetRef(dataset.datasetReference()).fieldName(lockInfoDataset.insertTimeField()).build(), batchStartTimestamp)); + keyValuePairs.add(Pair.of(FieldValue.builder().datasetRef(dataset.datasetReference()).fieldName(lockInfoDataset.lastUsedTimeField()).build(), batchStartTimestamp)); Update update = Update.builder().dataset(dataset).addAllKeyValuePairs(keyValuePairs).build(); return update; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java index c79fd8b5616..23007edbd49 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/LockInfoUtilsTest.java @@ -43,12 +43,13 @@ public class LockInfoUtilsTest public void testInitializeLockInfo() { LockInfoUtils store = new LockInfoUtils(lockInfoDataset); - Insert operation = store.initializeLockInfo(BatchStartTimestamp.INSTANCE); + Insert operation = store.initializeLockInfo("main", BatchStartTimestamp.INSTANCE); RelationalTransformer transformer = new RelationalTransformer(AnsiSqlSink.get(), transformOptions); LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "INSERT INTO main_table_lock (\"insert_ts_utc\") (SELECT '2000-01-01 00:00:00' WHERE NOT (EXISTS (SELECT * FROM main_table_lock as main_table_lock)))"; + String expectedSql = "INSERT INTO main_table_lock (\"insert_ts_utc\", \"table_name\") " + + "(SELECT '2000-01-01 00:00:00','main' WHERE NOT (EXISTS (SELECT * FROM main_table_lock as main_table_lock)))"; Assertions.assertEquals(expectedSql, list.get(0)); } @@ -61,7 +62,7 @@ public void testUpdateMetaStore() LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = "UPDATE main_table_lock as main_table_lock SET main_table_lock.\"insert_ts_utc\" = '2000-01-01 00:00:00'"; + String expectedSql = "UPDATE main_table_lock as main_table_lock SET main_table_lock.\"last_used_ts_utc\" = '2000-01-01 00:00:00'"; Assertions.assertEquals(expectedSql, list.get(0)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 93a747bc2e6..9a7a2a0141b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -310,7 +310,15 @@ private void initializeLock() { Map placeHolderKeyValues = new HashMap<>(); placeHolderKeyValues.put(BATCH_START_TS_PATTERN, LocalDateTime.now(executionTimestampClock()).format(DATE_TIME_FORMATTER)); - executor.executePhysicalPlan(generatorResult.initializeLockSqlPlan().orElseThrow(IllegalStateException::new), placeHolderKeyValues); + try + { + executor.executePhysicalPlan(generatorResult.initializeLockSqlPlan().orElseThrow(IllegalStateException::new), placeHolderKeyValues); + } + catch (Exception e) + { + // Ignore this exception + // In race condition: multiple jobs will try to insert same row + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index 507c99b8148..f4cda410aac 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -57,10 +57,10 @@ public class BaseTest { public static final String TEST_SCHEMA = "TEST"; public static final String TEST_DATABASE = "TEST_DB"; - private static final String H2_JDBC_URL = "jdbc:h2:mem:" + TEST_DATABASE + + protected static final String H2_JDBC_URL = "jdbc:h2:mem:" + TEST_DATABASE + ";DATABASE_TO_UPPER=false;mode=mysql;LOCK_TIMEOUT=10000;BUILTIN_ALIAS_OVERRIDE=TRUE"; - private static final String H2_USER_NAME = "sa"; - private static final String H2_PASSWORD = ""; + protected static final String H2_USER_NAME = "sa"; + protected static final String H2_PASSWORD = ""; public static JdbcHelper h2Sink; protected final ZonedDateTime fixedExecutionZonedDateTime1 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java index 212f912d7d1..ef070aaa5b0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/MixedIngestModeTest.java @@ -1,3 +1,17 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package org.finos.legend.engine.persistence.components.ingestmode.mixed; import org.finos.legend.engine.persistence.components.BaseTest; @@ -18,12 +32,14 @@ import static org.finos.legend.engine.persistence.components.TestUtils.*; -public class MixedIngestModeTest extends BaseTest { +public class MixedIngestModeTest extends BaseTest +{ private final String basePath = "src/test/resources/data/mixed/"; @Test - public void testMultiIngestionTypes() throws Exception { + public void testMultiIngestionTypes() throws Exception + { DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java new file mode 100644 index 00000000000..fe1a61dcb85 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java @@ -0,0 +1,47 @@ +package org.finos.legend.engine.persistence.components.ingestmode.mixed; + +import org.finos.legend.engine.persistence.components.BaseTest; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +public class UnitemporalConcurrentTest extends BaseTest +{ + @Test + public void test() throws InterruptedException, IOException + { + + AtomicInteger maxBatchIdCounter = new AtomicInteger(); + maxBatchIdCounter.set(0); + + // Thread 1 + String path1 = "src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass1.csv"; + Runnable r1 = new UnitemporalDeltaRunner(path1, "_thread1", H2_USER_NAME, H2_PASSWORD, H2_JDBC_URL, fixedClock_2000_01_01, maxBatchIdCounter); + Thread t1 = new Thread(r1); + t1.start(); + + // Thread 2 + String path2 = "src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass2.csv"; + Runnable r2 = new UnitemporalDeltaRunner(path2, "_thread2", H2_USER_NAME, H2_PASSWORD, H2_JDBC_URL, fixedClock_2000_01_01, maxBatchIdCounter); + Thread t2 = new Thread(r2); + t2.start(); + + // Thread 2 + String path3 = "src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass3.csv"; + Runnable r3 = new UnitemporalDeltaRunner(path3, "_thread3", H2_USER_NAME, H2_PASSWORD, H2_JDBC_URL, fixedClock_2000_01_01, maxBatchIdCounter); + Thread t3 = new Thread(r3); + t3.start(); + + // Sleep for a while for tests to finish + Thread.sleep(5000); + + List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", "main")); + Assertions.assertEquals(5, tableData.size()); + Assertions.assertEquals(3, maxBatchIdCounter.get()); + } + +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java new file mode 100644 index 00000000000..a2980054c18 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java @@ -0,0 +1,114 @@ +package org.finos.legend.engine.persistence.components.ingestmode.mixed; + +import org.finos.legend.engine.persistence.components.TestUtils; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; +import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalDelta; +import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcConnection; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; +import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; + +import java.time.Clock; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.finos.legend.engine.persistence.components.TestUtils.*; + +public class UnitemporalDeltaRunner implements Runnable +{ + private String stagingSuffix; + private Clock clock; + private AtomicInteger maxBatchIdCounter; + private String dataPath; + private JdbcHelper h2Sink; + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + + IngestMode getIngestMode() + { + UnitemporalDelta ingestMode = UnitemporalDelta.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .build(); + return ingestMode; + } + + public UnitemporalDeltaRunner(String dataPath, String stagingSuffix, String h2User, String h2Pwd, String h2JdbcUrl, Clock clock, AtomicInteger maxBatchIdCounter) + { + this.dataPath = dataPath; + this.stagingSuffix = stagingSuffix; + this.clock = clock; + this.maxBatchIdCounter = maxBatchIdCounter; + this.h2Sink = JdbcHelper.of(H2Sink.createConnection(h2User, h2Pwd, h2JdbcUrl)); + } + + @Override + public void run() + { + try + { + DatasetDefinition stagingTable = DatasetDefinition.builder() + .group(testSchemaName) + .name(stagingTableName + stagingSuffix) + .schema(getStagingSchema()) + .build(); + + createStagingTable(stagingTable); + loadBasicStagingData(dataPath, stagingTableName + stagingSuffix); + Datasets datasets = Datasets.of(mainTable, stagingTable); + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(getIngestMode()) + .relationalSink(H2Sink.get()) + .cleanupStagingData(true) + .collectStatistics(true) + .enableConcurrentSafety(true) + .executionTimestampClock(clock) + .build(); + + IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + System.out.println(String.format("%s : BatchId : %s", Thread.currentThread().getName() , result.batchId())); + System.out.println(String.format("%s : stats : %s", Thread.currentThread().getName() , result.statisticByName())); + + if (maxBatchIdCounter.get() < result.batchId().get()) + { + maxBatchIdCounter.set(result.batchId().get()); + } + } + catch (Exception e) + { + throw new RuntimeException(e); + } + finally + { + h2Sink.close(); + } + } + + protected void loadBasicStagingData(String path, String tableName) throws Exception + { + String loadSql = String.format("TRUNCATE TABLE \"TEST\".\"%s\";", tableName) + + String.format("INSERT INTO \"TEST\".\"%s\"(id, name, income, start_time ,expiry_date, digest) ", tableName) + + "SELECT CONVERT( \"id\",INT ), \"name\", CONVERT( \"income\", BIGINT), CONVERT( \"start_time\", DATETIME), CONVERT( \"expiry_date\", DATE), digest" + + " FROM CSVREAD( '" + path + "', 'id, name, income, start_time, expiry_date, digest', NULL )"; + h2Sink.executeStatement(loadSql); + } + + protected void createStagingTable(DatasetDefinition stagingTable) throws Exception + { + RelationalTransformer transformer = new RelationalTransformer(H2Sink.get()); + LogicalPlan tableCreationPlan = LogicalPlanFactory.getDatasetCreationPlan(stagingTable, true); + SqlPlan tableCreationPhysicalPlan = transformer.generatePhysicalPlan(tableCreationPlan); + h2Sink.executeStatements(tableCreationPhysicalPlan.getSqlList()); + } +} \ No newline at end of file From 8973f916388ab10d1600e84664a50dac5cfc5bab Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Thu, 17 Aug 2023 11:06:13 +0800 Subject: [PATCH 06/57] Code Clean up --- .../mixed/UnitemporalConcurrentTest.java | 14 ++++++++++++++ .../mixed/UnitemporalDeltaRunner.java | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java index fe1a61dcb85..3a8ae2b9291 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java @@ -1,3 +1,17 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package org.finos.legend.engine.persistence.components.ingestmode.mixed; import org.finos.legend.engine.persistence.components.BaseTest; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java index a2980054c18..28c7995ade2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalDeltaRunner.java @@ -1,3 +1,17 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package org.finos.legend.engine.persistence.components.ingestmode.mixed; import org.finos.legend.engine.persistence.components.TestUtils; @@ -77,9 +91,6 @@ public void run() .build(); IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); - System.out.println(String.format("%s : BatchId : %s", Thread.currentThread().getName() , result.batchId())); - System.out.println(String.format("%s : stats : %s", Thread.currentThread().getName() , result.statisticByName())); - if (maxBatchIdCounter.get() < result.batchId().get()) { maxBatchIdCounter.set(result.batchId().get()); From 97d93c9167b150d0a742f3d4d17d09937bbe777c Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Fri, 18 Aug 2023 14:25:43 +0800 Subject: [PATCH 07/57] Bug Fix: Bitemporal temp tables must be deleted after usage --- .../planner/BitemporalDeltaPlanner.java | 22 +++++++++++- .../components/planner/Planner.java | 5 +++ .../components/util/LogicalPlanUtils.java | 6 ++-- .../components/AnsiTestArtifacts.java | 25 +++++++++++++ ...eltaSourceSpecifiesFromAndThroughTest.java | 10 ++++++ ...itemporalDeltaSourceSpecifiesFromTest.java | 35 ++++++++++++++++++- .../nontemporal/AppendOnlyTest.java | 9 +++++ .../nontemporal/NontemporalDeltaTest.java | 10 ++++++ .../nontemporal/NontemporalSnapshotTest.java | 9 +++++ .../UnitemporalDeltaBatchIdBasedTest.java | 13 +++++++ .../UnitemporalSnapshotBatchIdBasedTest.java | 10 ++++++ .../api/GeneratorResultAbstract.java | 7 ++++ .../api/RelationalGeneratorAbstract.java | 8 +++++ .../api/RelationalIngestorAbstract.java | 23 ++++++++++++ .../persistence/components/BaseTest.java | 1 + .../mixed/UnitemporalConcurrentTest.java | 4 ++- .../nontemporal/AppendOnlyTest.java | 3 +- ...ourceSpecifiesFromAndThroughTestCases.java | 1 + ...oralDeltaSourceSpecifiesFromTestCases.java | 1 + .../nontemporal/AppendOnlyTestCases.java | 4 +++ .../NontemporalDeltaTestCases.java | 2 ++ .../NontemporalSnapshotTestCases.java | 2 ++ ...nitmemporalDeltaBatchIdBasedTestCases.java | 2 ++ ...memporalSnapshotBatchIdBasedTestCases.java | 1 + 24 files changed, 207 insertions(+), 6 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java index 99838078718..e32437d2795 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java @@ -46,6 +46,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Update; import org.finos.legend.engine.persistence.components.logicalplan.operations.UpdateAbstract; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.values.Case; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; @@ -57,7 +58,6 @@ import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; -import java.util.Arrays; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -89,6 +89,7 @@ class BitemporalDeltaPlanner extends BitemporalPlanner private Dataset stagingDataset; private Dataset tempDataset; private Dataset tempDatasetWithDeleteIndicator; + private Optional stagingDatasetWithoutDuplicates; private FieldValue sourceValidDatetimeFrom; private FieldValue targetValidDatetimeFrom; @@ -111,6 +112,7 @@ class BitemporalDeltaPlanner extends BitemporalPlanner if (ingestMode().validityMilestoning().validityDerivation() instanceof SourceSpecifiesFromDateTime && ingestMode().deduplicationStrategy() instanceof FilterDuplicates) { this.stagingDataset = getStagingDatasetWithoutDuplicates(datasets); + this.stagingDatasetWithoutDuplicates = Optional.of(this.stagingDataset); } else { @@ -336,6 +338,24 @@ protected Selection getRowsUpdated(String alias) return getRowsUpdated(alias, getPrimaryKeyFieldsAndFromFieldFromMain(), sink2); } + public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) + { + List operations = new ArrayList<>(); + if (ingestMode().validityMilestoning().validityDerivation() instanceof SourceSpecifiesFromDateTime) + { + operations.add(Drop.of(true, tempDataset, true)); + if (deleteIndicatorField.isPresent()) + { + operations.add(Drop.of(true, tempDatasetWithDeleteIndicator, true)); + } + if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates && stagingDatasetWithoutDuplicates.isPresent()) + { + operations.add(Drop.of(true, stagingDatasetWithoutDuplicates.get(), true)); + } + } + return LogicalPlan.of(operations); + } + public Optional getDataSplitInRangeConditionForStatistics() { return ingestMode().dataSplitField().map(field -> LogicalPlanUtils.getDataSplitInRangeCondition(stagingDataset(), field)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index be3242e61f0..1ec5ff31c1e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -191,6 +191,11 @@ else if (plannerOptions.cleanupStagingData()) return LogicalPlan.of(operations); } + public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) + { + return null; + } + public Map buildLogicalPlanForPreRunStatistics(Resources resources) { return Collections.emptyMap(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 0631fe882cf..074ddf70e26 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -404,11 +404,12 @@ public static List extractStagedFilesFieldValues(Dataset dataset) public static Dataset getTempDataset(Datasets datasets) { + String mainDatasetName = datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); return datasets.tempDataset().orElse(DatasetDefinition.builder() .schema(datasets.mainDataset().schema()) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) - .name(LogicalPlanUtils.generateTableNameWithSuffix(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)), TEMP_DATASET_BASE_NAME)) + .name(mainDatasetName + UNDERSCORE + TEMP_DATASET_BASE_NAME) .alias(TEMP_DATASET_BASE_NAME) .build()); } @@ -421,6 +422,7 @@ public static Dataset getTempDatasetWithDeleteIndicator(Datasets datasets, Strin } else { + String mainDatasetName = datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)); Field deleteIndicator = Field.builder().name(deleteIndicatorField).type(FieldType.of(DataType.BOOLEAN, Optional.empty(), Optional.empty())).build(); List mainFieldsPlusDeleteIndicator = new ArrayList<>(datasets.mainDataset().schema().fields()); mainFieldsPlusDeleteIndicator.add(deleteIndicator); @@ -428,7 +430,7 @@ public static Dataset getTempDatasetWithDeleteIndicator(Datasets datasets, Strin .schema(datasets.mainDataset().schema().withFields(mainFieldsPlusDeleteIndicator)) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) - .name(LogicalPlanUtils.generateTableNameWithSuffix(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)), TEMP_DATASET_WITH_DELETE_INDICATOR_BASE_NAME)) + .name(mainDatasetName + UNDERSCORE + TEMP_DATASET_WITH_DELETE_INDICATOR_BASE_NAME) .alias(TEMP_DATASET_WITH_DELETE_INDICATOR_BASE_NAME) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java index 5491918fd2b..eab7b769251 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/AnsiTestArtifacts.java @@ -141,6 +141,31 @@ public class AnsiTestArtifacts public static String expectedBaseStagingTableCreateQueryWithNoPKs = "CREATE TABLE IF NOT EXISTS \"mydb\".\"staging\"(" + "\"id\" INTEGER,\"name\" VARCHAR,\"amount\" DOUBLE,\"biz_date\" DATE,\"digest\" VARCHAR)"; + public static String expectedLockInfoTableCreateQuery = "CREATE TABLE IF NOT EXISTS \"mydb\".\"main_legend_persistence_lock\"" + + "(\"insert_ts_utc\" DATETIME,\"last_used_ts_utc\" DATETIME,\"table_name\" VARCHAR UNIQUE)"; + + public static String expectedLockInfoTableUpperCaseCreateQuery = "CREATE TABLE IF NOT EXISTS \"MYDB\".\"MAIN_LEGEND_PERSISTENCE_LOCK\"" + + "(\"INSERT_TS_UTC\" DATETIME,\"LAST_USED_TS_UTC\" DATETIME,\"TABLE_NAME\" VARCHAR UNIQUE)"; + + public static String lockInitializedQuery = "INSERT INTO \"mydb\".\"main_legend_persistence_lock\" " + + "(\"insert_ts_utc\", \"table_name\") " + + "(SELECT '2000-01-01 00:00:00','main' " + + "WHERE NOT (EXISTS (SELECT * FROM \"mydb\".\"main_legend_persistence_lock\" as main_legend_persistence_lock)))"; + + public static String lockInitializedUpperCaseQuery = "INSERT INTO \"MYDB\".\"MAIN_LEGEND_PERSISTENCE_LOCK\" (\"INSERT_TS_UTC\", \"TABLE_NAME\")" + + " (SELECT '2000-01-01 00:00:00','MAIN' WHERE NOT (EXISTS (SELECT * FROM \"MYDB\".\"MAIN_LEGEND_PERSISTENCE_LOCK\" as MAIN_LEGEND_PERSISTENCE_LOCK)))"; + + public static String lockAcquiredQuery = "UPDATE \"mydb\".\"main_legend_persistence_lock\" as main_legend_persistence_lock " + + "SET main_legend_persistence_lock.\"last_used_ts_utc\" = '2000-01-01 00:00:00'"; + + public static String lockAcquiredUpperCaseQuery = "UPDATE \"MYDB\".\"MAIN_LEGEND_PERSISTENCE_LOCK\" as MAIN_LEGEND_PERSISTENCE_LOCK " + + "SET MAIN_LEGEND_PERSISTENCE_LOCK.\"LAST_USED_TS_UTC\" = '2000-01-01 00:00:00'"; + + public static String getDropTempTableQuery(String tableName) + { + return String.format("DROP TABLE IF EXISTS %s CASCADE", tableName); + } + public static String expectedBaseTableCreateQueryWithAuditAndNoPKs = "CREATE TABLE IF NOT EXISTS \"mydb\".\"main\"" + "(\"id\" INTEGER,\"name\" VARCHAR,\"amount\" DOUBLE,\"biz_date\" DATE,\"digest\" VARCHAR,\"batch_update_time\" DATETIME)"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java index 39c8f6e92cd..ef02b5b78d1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTest.java @@ -24,6 +24,9 @@ import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; + public class BitemporalDeltaSourceSpecifiesFromAndThroughTest extends BitemporalDeltaSourceSpecifiesFromAndThroughTestCases { @Override @@ -32,6 +35,9 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(GeneratorRe List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); + List postCleanupSql = operations.postCleanupSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -53,10 +59,14 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(GeneratorRe Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalMainTableCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalStagingTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(2)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSql.get(3)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + Assertions.assertEquals(0, postCleanupSql.size()); String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java index 9a694134eb6..777c8bb3a4f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTest.java @@ -24,7 +24,8 @@ import java.util.List; -import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.expectedMetadataTableIngestQueryWithPlaceHolders; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.getDropTempTableQuery; public class BitemporalDeltaSourceSpecifiesFromTest extends BitemporalDeltaSourceSpecifiesFromTestCases { @@ -41,6 +42,9 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(GeneratorRe List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); + List postCleanupSql = operations.postCleanupSql(); String expectedStageToTemp = "INSERT INTO \"mydb\".\"temp\" " + "(\"id\", \"name\", \"amount\", \"digest\", \"validity_from_target\", \"validity_through_target\", \"batch_id_in\", \"batch_id_out\") " + @@ -99,6 +103,7 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(GeneratorRe Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyStagingTableCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(2)); Assertions.assertEquals(AnsiTestArtifacts.expectedBitemporalFromOnlyTempTableCreateQuery, preActionsSql.get(3)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSql.get(4)); Assertions.assertEquals(expectedStageToTemp, milestoningSql.get(0)); Assertions.assertEquals(expectedMainToTemp, milestoningSql.get(1)); @@ -106,6 +111,10 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(GeneratorRe Assertions.assertEquals(expectedTempToMain, milestoningSql.get(3)); Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"temp\"", "temp"), milestoningSql.get(4)); + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"temp\""), postCleanupSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -185,6 +194,8 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount,dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); verifyStats(operations.get(1), enrichSqlWithDataSplits(incomingRecordCount,dataSplitRanges.get(1)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); @@ -299,6 +310,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplits(Generator Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"temp\"", "temp"), milestoningSql.get(7)); Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"tempWithDeleteIndicator\"", "tempWithDeleteIndicator"), milestoningSql.get(8)); + System.out.println(operations.postCleanupSql()); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"temp\""), operations.postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"tempWithDeleteIndicator\""), operations.postCleanupSql().get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsInserted\""; @@ -449,6 +464,9 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplits(List= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsInserted\""; @@ -535,6 +553,9 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplitsFilterDuplic Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"temp\"", "temp"), milestoningSql.get(5)); Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"stagingWithoutDuplicates\"", "stage"), milestoningSql.get(6)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"temp\""), operations.postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"stagingWithoutDuplicates\""), operations.postCleanupSql().get(1)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } @@ -623,6 +644,9 @@ public void verifyBitemporalDeltaBatchIdBasedNoDeleteIndWithDataSplitsFilterDupl Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(0).metadataIngestSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"temp\""), operations.get(0).postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"stagingWithoutDuplicates\""), operations.get(0).postCleanupSql().get(1)); + Assertions.assertEquals(2, operations.size()); String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; verifyStats(operations.get(0), enrichSqlWithDataSplits(incomingRecordCount,dataSplitRanges.get(0)), rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); @@ -747,6 +771,11 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndNoDataSplitsFilterDupl Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"tempWithDeleteIndicator\"", "tempWithDeleteIndicator"), milestoningSql.get(9)); Assertions.assertEquals(getExpectedCleanupSql("\"mydb\".\"stagingWithoutDuplicates\"", "stage"), milestoningSql.get(10)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"temp\""), operations.postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"tempWithDeleteIndicator\""), operations.postCleanupSql().get(1)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"stagingWithoutDuplicates\""), operations.postCleanupSql().get(2)); + + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; String rowsInserted = "SELECT (SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))-(SELECT COUNT(*) FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))) as \"rowsInserted\""; @@ -918,6 +947,10 @@ public void verifyBitemporalDeltaBatchIdBasedWithDeleteIndWithDataSplitsFilterDu Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), operations.get(0).metadataIngestSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"main_legend_persistence_temp\""), operations.get(0).postCleanupSql().get(0)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"main_legend_persistence_tempWithDeleteIndicator\""), operations.get(0).postCleanupSql().get(1)); + Assertions.assertEquals(getDropTempTableQuery("\"mydb\".\"staging_legend_persistence_stageWithoutDuplicates\""), operations.get(0).postCleanupSql().get(2)); + Assertions.assertEquals(2, operations.size()); String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage WHERE (stage.\"data_split\" >= '{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}') AND (stage.\"data_split\" <= '{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}')"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE (sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1) AND (EXISTS (SELECT * FROM \"mydb\".\"main\" as sink2 WHERE ((sink2.\"id\" = sink.\"id\") AND (sink2.\"name\" = sink.\"name\") AND (sink2.\"validity_from_target\" = sink.\"validity_from_target\")) AND (sink2.\"batch_id_in\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN'))))"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 7612a3a8046..69799b67769 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -26,6 +26,9 @@ import java.util.ArrayList; import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; + public class AppendOnlyTest extends AppendOnlyTestCases { String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; @@ -39,13 +42,19 @@ public void verifyAppendOnlyAllowDuplicatesNoAuditing(GeneratorResult operations { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\", \"digest\") " + "(SELECT * FROM \"mydb\".\"staging\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQueryWithNoPKs, preActionsSqlList.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseStagingTableCreateQueryWithNoPKs, preActionsSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSqlList.get(2)); Assertions.assertEquals(insertSql, milestoningSqlList.get(0)); + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + // Stats verifyStats(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java index ed11f86cd4c..948a3132866 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalDeltaTest.java @@ -26,6 +26,9 @@ import java.util.ArrayList; import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; + public class NontemporalDeltaTest extends NontemporalDeltaTestCases { protected String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; @@ -40,6 +43,8 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); String updateSql = "UPDATE \"mydb\".\"main\" as sink SET " + "sink.\"id\" = (SELECT stage.\"id\" FROM \"mydb\".\"staging\" as stage WHERE ((sink.\"id\" = stage.\"id\") AND (sink.\"name\" = stage.\"name\")) AND (sink.\"digest\" <> stage.\"digest\"))," + @@ -58,9 +63,14 @@ public void verifyNontemporalDeltaNoAuditingNoDataSplit(GeneratorResult operatio Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTablePlusDigestCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSqlList.get(2)); + Assertions.assertEquals(updateSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + // Stats Assertions.assertEquals(incomingRecordCount, operations.postIngestStatisticsSql().get(StatisticName.INCOMING_RECORD_COUNT)); Assertions.assertEquals(rowsTerminated, operations.postIngestStatisticsSql().get(StatisticName.ROWS_TERMINATED)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java index 41ad41f8c3b..3771c827314 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/NontemporalSnapshotTest.java @@ -26,6 +26,9 @@ import java.util.ArrayList; import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; + public class NontemporalSnapshotTest extends NontemporalSnapshotTestCases { String cleanUpMainTableSql = "DELETE FROM \"mydb\".\"main\" as sink"; @@ -41,14 +44,20 @@ public void verifyNontemporalSnapshotNoAuditingNoDataSplit(GeneratorResult opera { List preActionsSqlList = operations.preActionsSql(); List milestoningSqlList = operations.ingestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); String insertSql = "INSERT INTO \"mydb\".\"main\" (\"id\", \"name\", \"amount\", \"biz_date\") " + "(SELECT * FROM \"mydb\".\"staging\" as stage)"; Assertions.assertEquals(AnsiTestArtifacts.expectedBaseTableCreateQuery, preActionsSqlList.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedBaseStagingTableCreateQuery, preActionsSqlList.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSqlList.get(2)); + Assertions.assertEquals(cleanUpMainTableSql, milestoningSqlList.get(0)); Assertions.assertEquals(insertSql, milestoningSqlList.get(1)); + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); // Stats verifyStats(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java index 136df863c99..8470cdbeb0f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalDeltaBatchIdBasedTest.java @@ -24,6 +24,8 @@ import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.*; + public class UnitemporalDeltaBatchIdBasedTest extends UnitmemporalDeltaBatchIdBasedTestCases { @Override @@ -32,6 +34,8 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -53,10 +57,13 @@ public void verifyUnitemporalDeltaNoDeleteIndNoAuditing(GeneratorResult operatio Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(2)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSql.get(3)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; String rowsUpdated = "SELECT COUNT(*) as \"rowsUpdated\" FROM \"mydb\".\"main\" as sink WHERE sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1"; @@ -194,6 +201,8 @@ public void verifyUnitemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); String expectedMilestoneQuery = "UPDATE \"MYDB\".\"MAIN\" as sink " + "SET sink.\"BATCH_ID_OUT\" = (SELECT COALESCE(MAX(BATCH_METADATA.\"TABLE_BATCH_ID\"),0)+1 " + @@ -214,9 +223,13 @@ public void verifyUnitemporalDeltaWithUpperCaseOptimizer(GeneratorResult operati Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQueryWithUpperCase, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQueryWithUpperCase(), preActionsSql.get(1)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableUpperCaseCreateQuery, preActionsSql.get(2)); + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQueryWithUpperCase(), metadataIngestSql.get(0)); + Assertions.assertEquals(lockInitializedUpperCaseQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredUpperCaseQuery, acquireLockSql.get(0)); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java index 9033d4a5dc9..5abcbe0e34b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java @@ -23,6 +23,9 @@ import java.util.List; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockAcquiredQuery; +import static org.finos.legend.engine.persistence.components.AnsiTestArtifacts.lockInitializedQuery; + public class UnitemporalSnapshotBatchIdBasedTest extends UnitmemporalSnapshotBatchIdBasedTestCases { String incomingRecordCount = "SELECT COUNT(*) as \"incomingRecordCount\" FROM \"mydb\".\"staging\" as stage"; @@ -37,6 +40,8 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); + List initializeLockSql = operations.initializeLockSql(); + List acquireLockSql = operations.acquireLockSql(); String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + @@ -54,10 +59,15 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(AnsiTestArtifacts.expectedStagingTableWithDigestCreateQuery, preActionsSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(2)); + Assertions.assertEquals(AnsiTestArtifacts.expectedLockInfoTableCreateQuery, preActionsSql.get(3)); Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); Assertions.assertEquals(expectedUpsertQuery, milestoningSql.get(1)); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + + Assertions.assertEquals(lockInitializedQuery, initializeLockSql.get(0)); + Assertions.assertEquals(lockAcquiredQuery, acquireLockSql.get(0)); + verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java index 847ea4b7f15..3cfc890a74f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/GeneratorResultAbstract.java @@ -58,6 +58,8 @@ public abstract class GeneratorResultAbstract public abstract SqlPlan postActionsSqlPlan(); + public abstract Optional postCleanupSqlPlan(); + public abstract Map preIngestStatisticsSqlPlan(); public abstract Map postIngestStatisticsSqlPlan(); @@ -102,6 +104,11 @@ public List postActionsSql() return postActionsSqlPlan().getSqlList(); } + public List postCleanupSql() + { + return postCleanupSqlPlan().map(SqlPlanAbstract::getSqlList).orElse(Collections.emptyList()); + } + public Map preIngestStatisticsSql() { return preIngestStatisticsSqlPlan().keySet().stream() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index df4bafa3df2..b33305e76fd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -243,6 +243,13 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann LogicalPlan postActionsLogicalPlan = planner.buildLogicalPlanForPostActions(resources); SqlPlan postActionsSqlPlan = transformer.generatePhysicalPlan(postActionsLogicalPlan); + LogicalPlan postCleanupLogicalPlan = planner.buildLogicalPlanForPostCleanup(resources); + Optional postCleanupSqlPlan = Optional.empty(); + if (postCleanupLogicalPlan != null) + { + postCleanupSqlPlan = Optional.of(transformer.generatePhysicalPlan(postCleanupLogicalPlan)); + } + // post-run statistics Map postIngestStatisticsLogicalPlan = planner.buildLogicalPlanForPostRunStatistics(resources); Map postIngestStatisticsSqlPlan = new HashMap<>(); @@ -259,6 +266,7 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann .schemaEvolutionDataset(schemaEvolutionDataset) .ingestSqlPlan(ingestSqlPlan) .postActionsSqlPlan(postActionsSqlPlan) + .postCleanupSqlPlan(postCleanupSqlPlan) .metadataIngestSqlPlan(metaDataIngestSqlPlan) .putAllPreIngestStatisticsSqlPlan(preIngestStatisticsSqlPlan) .putAllPostIngestStatisticsSqlPlan(postIngestStatisticsSqlPlan) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 9a7a2a0141b..3344a247fdc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -238,6 +238,16 @@ public IngestorResult ingest(Datasets datasets) return ingest(Arrays.asList()).stream().findFirst().orElseThrow(IllegalStateException::new); } + /* + - Perform cleanup of temporary tables + */ + public Datasets cleanUp(Datasets datasets) + { + init(datasets); + postCleanup(); + return this.enrichedDatasets; + } + /* Perform full ingestion from Staging to Target table based on the Ingest mode Full Ingestion covers: @@ -245,6 +255,7 @@ public IngestorResult ingest(Datasets datasets) 2. Create tables 3. Evolves Schema 4. Ingestion from staging to main dataset in a transaction + 5. Clean up of temporary tables */ public IngestorResult performFullIngestion(RelationalConnection connection, Datasets datasets) { @@ -332,6 +343,14 @@ private void acquireLock() } } + private void postCleanup() + { + if (generatorResult.postCleanupSqlPlan().isPresent()) + { + executor.executePhysicalPlan(generatorResult.postCleanupSqlPlan().get()); + } + } + private List ingest(List dataSplitRanges) { if (enrichedIngestMode instanceof BulkLoad) @@ -377,6 +396,10 @@ private List performFullIngestion(RelationalConnection connectio { executor.close(); } + + // post Cleanup + postCleanup(); + return result; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java index f4cda410aac..2dcd54229ea 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/BaseTest.java @@ -170,6 +170,7 @@ protected IngestorResult executePlansAndVerifyResults(IngestMode ingestMode, Pla .collectStatistics(options.collectStatistics()) .enableSchemaEvolution(options.enableSchemaEvolution()) .schemaEvolutionCapabilitySet(userCapabilitySet) + .enableConcurrentSafety(true) .build(); IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java index 3a8ae2b9291..779e71dcac5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.IOException; @@ -25,6 +26,7 @@ public class UnitemporalConcurrentTest extends BaseTest { + @Disabled @Test public void test() throws InterruptedException, IOException { @@ -51,7 +53,7 @@ public void test() throws InterruptedException, IOException t3.start(); // Sleep for a while for tests to finish - Thread.sleep(5000); + Thread.sleep(10000); List> tableData = h2Sink.executeQuery(String.format("select * from \"TEST\".\"%s\"", "main")); Assertions.assertEquals(5, tableData.size()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index 30e8b98d60f..d618993e983 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -385,6 +385,7 @@ void testAppendOnlyDoNotCreateTables() throws Exception .ingestMode(ingestMode) .relationalSink(H2Sink.get()) .createDatasets(false) + .enableConcurrentSafety(true) .build(); try { @@ -393,7 +394,7 @@ void testAppendOnlyDoNotCreateTables() throws Exception } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("Table \"main\" not found")); + Assertions.assertTrue(e.getMessage().contains("not found")); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTestCases.java index ca7b61e48d1..fc5527c2c54 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromAndThroughTestCases.java @@ -41,6 +41,7 @@ void testBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits() .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTestCases.java index f844d1983a3..27d55c2715c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/bitemporal/BitemporalDeltaSourceSpecifiesFromTestCases.java @@ -39,6 +39,7 @@ void testBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits() .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyBitemporalDeltaBatchIdBasedNoDeleteIndNoDataSplits(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java index 6012ee5956a..a76656c6870 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/AppendOnlyTestCases.java @@ -45,6 +45,8 @@ void testAppendOnlyAllowDuplicatesNoAuditing() .relationalSink(getRelationalSink()) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) + .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyAppendOnlyAllowDuplicatesNoAuditing(operations); @@ -59,6 +61,8 @@ void testAppendOnlyAllowDuplicatesNoAuditingDeriveMainSchema() .relationalSink(getRelationalSink()) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) + .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyAppendOnlyAllowDuplicatesNoAuditing(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java index 4dca6ffad81..87a69bf5da6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalDeltaTestCases.java @@ -44,6 +44,8 @@ void testNontemporalDeltaNoAuditingNoDataSplit() .relationalSink(getRelationalSink()) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) + .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java index 1f371100174..4a81ebce332 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java @@ -49,6 +49,8 @@ void testNontemporalSnapshotNoAuditingNoDataSplit() .relationalSink(getRelationalSink()) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) + .executionTimestampClock(fixedClock_2000_01_01) .build(); GeneratorResult operations = generator.generateOperations(testScenario.getDatasets()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java index 1687cbd1954..fd0a9b3593c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalDeltaBatchIdBasedTestCases.java @@ -48,6 +48,7 @@ void testUnitemporalDeltaNoDeleteIndNoDataSplits() .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalDeltaNoDeleteIndNoAuditing(operations); @@ -113,6 +114,7 @@ void testUnitemporalDeltaWithUpperCaseOptimizer() .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .collectStatistics(true) + .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalDeltaWithUpperCaseOptimizer(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java index 8e20f25026c..395bac86ebb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java @@ -45,6 +45,7 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() .executionTimestampClock(fixedClock_2000_01_01) .collectStatistics(true) .createStagingDataset(true) + .enableConcurrentSafety(true) .build(); GeneratorResult operations = generator.generateOperations(scenario.getDatasets()); verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(operations); From 5b6e9793683407b01b0737f03a6008239d3121b2 Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Fri, 18 Aug 2023 16:39:11 +0800 Subject: [PATCH 08/57] Update readme and code review comments --- .../README.md | 119 +++++++++++++----- .../datasets/DatasetCaseConverter.java | 1 + .../planner/BitemporalDeltaPlanner.java | 4 +- .../relational/api/IngestStatus.java | 2 +- .../nontemporal/AppendOnlyTest.java | 3 +- .../relational/snowflake/SnowflakeSink.java | 2 +- 6 files changed, 95 insertions(+), 36 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md index e3c5c3e7a14..29720d80319 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md @@ -25,6 +25,7 @@ The following tests are available for verifying the logic : * Tests to verify the execution of the generated SQL's in an H2 executor (module : *legend-engine-xt-persistence-component-relational-h2*) * Tests to verify the SQL's generated for Snowflake (module : *legend-engine-xt-persistence-component-relational-snowflake*) * Tests to verify the SQL's generated for Memsql (module : *legend-engine-xt-persistence-component-relational-memsql*) +* Tests to verify the SQL's generated for BigQuery (module : *legend-engine-xt-persistence-component-relational-bigquery*) ## Using it as a library @@ -90,8 +91,9 @@ the latest version from Maven Central. // Or provide main, staging, temp and tempWithDeleteIndicator dataset (used only for bitemporal delta ingest mode) Datasets datasets = Datasets.of(mainTable, stagingTable).withTempDataset(tempDataset).withTempDatasetWithDeleteIndicator(tempDatasetWithDeleteIndicator); -**Step 4:** To extract the SQLs needed for ingestion, follow steps 4.1 and 4.2. -If you do not need the SQL and want to use the executor to execute the SQLs for you, skip this step and jump to Step 5. +**Step 4:** The library provides two modes - Generator mode and Executor mode. +- Generator mode: Provides the SQLs needed for ingestion. The user is expected to run these sql in proper order to perform the ingestion. To use this mode, follow steps 4.1 and 4.2. +- Executor mode: Here the library provides the methods for end to end ingestion. This mode internally uses the generator mode to generate the sqls and then runs them in correct order. To use this mode, skip this step and jump to Step 5. **Step 4.1:** Define a RelationalGenerator @@ -110,46 +112,55 @@ Mandatory Params: Optional Params: -| parameters | Description | Default Value | -|--------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|-------------------| -| cleanupStagingData | clean staging table after completion of ingestion | true | -| collectStatistics | Collect Statistics from ingestion | false | -| enableSchemaEvolution | Enable Schema Evolution to happen | false | -| caseConversion | Convert SQL objects like table, db, column names to upper or lower case.
Values supported - TO_UPPER, TO_LOWER, NONE | NONE | -| executionTimestampClock | Clock to use to derive the time | Clock.systemUTC() | -| batchStartTimestampPattern | Pattern for batchStartTimestamp. If this pattern is provided, it will replace the batchStartTimestamp values | None | -| batchEndTimestampPattern | Pattern for batchEndTimestamp. If this pattern is provided, it will replace the batchEndTimestamp values | None | -| batchIdPattern | Pattern for batch id. If this pattern is provided, it will replace the next batch id | None | -| createStagingDataset | Enables creation of staging Dataset | false | -| schemaEvolutionCapabilitySet | A set that enables fine grained schema evolution capabilities - ADD_COLUMN, DATA_TYPE_CONVERSION, DATA_TYPE_SIZE_CHANGE, COLUMN_NULLABILITY_CHANGE | Empty set | -| infiniteBatchIdValue | Value to be used for Infinite batch id | 999999999 | - +| parameters | Description | Default Value | +|--------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------| +| cleanupStagingData | clean staging table after completion of ingestion | true | +| collectStatistics | Collect Statistics from ingestion | false | +| enableSchemaEvolution | Enable Schema Evolution to happen | false | +| caseConversion | Convert SQL objects like table, db, column names to upper or lower case.
Values supported - TO_UPPER, TO_LOWER, NONE | NONE | +| executionTimestampClock | Clock to use to derive the time | Clock.systemUTC() | +| batchStartTimestampPattern | Pattern for batchStartTimestamp. If this pattern is provided, it will replace the batchStartTimestamp values | None | +| batchEndTimestampPattern | Pattern for batchEndTimestamp. If this pattern is provided, it will replace the batchEndTimestamp values | None | +| batchIdPattern | Pattern for batch id. If this pattern is provided, it will replace the next batch id | None | +| createStagingDataset | Enables creation of staging Dataset | false | +| schemaEvolutionCapabilitySet | A set that enables fine grained schema evolution capabilities - ADD_COLUMN, DATA_TYPE_CONVERSION, DATA_TYPE_SIZE_CHANGE, COLUMN_NULLABILITY_CHANGE | Empty set | +| infiniteBatchIdValue | Value to be used for Infinite batch id | 999999999 | +| enableConcurrentSafety | Enables safety for concurrent ingestion on the same table. If enabled, the library creates a special lock table to block other concurrent ingestion on the same table | false | **Step 4.2:** Use the generator object to extract the queries GeneratorResult operations = generator.generateOperations(datasets); List preActionsSql = operations.preActionsSql(); // Pre actions: create tables + List initializeLockSql = operations.initializeLockSql(); // Initialize the lock table Map preIngestStatisticsSql = operations.preIngestStatisticsSql(); // Pre Ingest stats + List acquireLockSql = operations.acquireLockSql(); // Acquire Lock List ingestSql = operations.ingestSql(); // milestoning sql Map postIngestStatisticsSql = operations.postIngestStatisticsSql(); // post Ingest stats List metadataIngestSql = operations.metadataIngestSql(); // insert batch into metadata table List postActionsSql = operations.postActionsSql(); // post actions cleanup + List postCleanupSql = operations.postCleanupSql(); // drop temporal tables if any NOTE 1: These queries must be strictly run in the order shown below. 1. preActionsSql - Creates tables -2. preIngestStatisticsSql - Collects pre ingest stats -3. ingestSql - Performs ingest/milestoning -4. postIngestStatisticsSql - Collects post ingest stats -5. metadataIngestSql - Inserts batch Id into metadata table -6. postActionsSql - Does clean up +2. initializeLockSql - Initialize the lock table +3. preIngestStatisticsSql - Collects pre ingest stats +4. acquireLockSql - Acquire a lock using the lock table +5. ingestSql - Performs ingest/milestoning +6. postIngestStatisticsSql - Collects post ingest stats +7. metadataIngestSql - Inserts batch Id into metadata table +8. postActionsSql - Does clean up +9. postCleanupSql - Drop the temporary tables if any -NOTE 2: Statistics provided: -1) INCOMING_RECORD_COUNT -2) ROWS_TERMINATED -3) ROWS_INSERTED -4) ROWS_UPDATED -5) ROWS_DELETED +Note that step 4 to step 8 must run in a single transaction. +NOTE 2: Statistics provided: +1) INCOMING_RECORD_COUNT - Number of incoming rows in staging table in the current batch +2) ROWS_TERMINATED - Number of rows marked for deletion in the current batch +3) ROWS_INSERTED - Number of rows inserted in the current batch +4) ROWS_UPDATED - Number of rows updated in the current batch +5) ROWS_DELETED - Number of rows physically deleted in the current batch +6) FILES_LOADED - Number of files loaded - only provided with BulkLoad +7) ROWS_WITH_ERRORS - Number of rows with error while Bulk Loading - only provided with BulkLoad **Step 5:** To use the executor to perform the ingestion for you, follow the steps in step 5. Skip this step if you just want the SQLs. @@ -182,11 +193,59 @@ Optional Params: | createDatasets | A flag to enable or disable dataset creation in Executor mode | true | | createStagingDataset | Enables creation of staging Dataset | false | | schemaEvolutionCapabilitySet | A set that enables fine grained schema evolution capabilities - ADD_COLUMN, DATA_TYPE_CONVERSION, DATA_TYPE_SIZE_CHANGE, COLUMN_NULLABILITY_CHANGE | Empty set | +| enableConcurrentSafety | Enables safety for concurrent ingestion on the same table. If enabled, the library creates a special lock table to block other concurrent ingestion on the same table | false | + +**Step 5.2:** Ingestor mode provides two different types of APIs : "Perform Full ingestion API" and "Granular APIs" + +1. **Perform Full ingestion API** - This api performs end to end ingestion that involves table creation, schema evolution, ingestion and cleanup. + +` IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); + Map stats = result.statisticByName();` + +2. **Granular APIs** - Set of APIs that provides user ability to run these individual pieces themselves + + - **init** : `public Executor init(RelationalConnection connection)` + - This api initializes the executor and returns it back to the user. The users can use the executor to control when to begin/start transaction or run their own queries within the transaction + + - **create** : `public Datasets create(Datasets datasets)` + - This api will create all the required tables and returns the enriched datasets + + - **evolve** : `public Datasets evolve(Datasets datasets)` + - This api will perform the schema evolution on main dataset based on changes in schema of Staging dataset + + - **ingest** : `public IngestorResult ingest(Datasets datasets)` + - This api will perform the ingestion based on selected Ingest mode and returns the Ingestion result + + - **cleanup** : `public Datasets cleanUp(Datasets datasets)` + - This api will drop the temporary tables if they were created during the ingestion + +Example: + + Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); + datasets = ingestor.create(datasets); + datasets = ingestor.evolve(datasets); + + executor.begin(); + IngestorResult result = ingestor.ingest(datasets); + // Do more stuff if needed + executor.commit(); + + datasets = ingestor.cleanup(datasets); + -**Step 5.2:** Invoke the ingestion and get the stats +## Ingestion Result: +Ingestion result provides these fields: - IngestorResult result = ingestor.ingest(h2Sink.connection(), datasets); - Map stats = result.statisticByName(); +| Field Name | Description | +|----------------|-----------------------------------------------------------------------------------------------------------------------------| +| batchId | Batch id generated for the batch. It is an optional field only generated for temporal Ingest modes | +| dataSplitRange | This provides the List of dataSplitRange in the staging datasets. This is an optional field returned when we use datasplits | +| statisticByName | The statistics generated by the ingestion. The detailed statistics are provided in step 4.2 | +| updatedDatasets | The enriched datasets | +| schemaEvolutionSql | If schema evolution is enabled, this field will return the schema evolutions sqls those were trigerred if any | +| status | Ingestion status enum - SUCCEDED or FAILED | +| message | Any message generated during the ingestion | +| ingestionTimestampUTC | This returns the ingestion timestamp in UTC | ## Ingest Modes diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java index 3803820d032..9f118acdeeb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java @@ -159,6 +159,7 @@ public LockInfoDataset applyCaseOnLockInfoDataset(LockInfoDataset lockInfoDatase .name(strategy.apply(lockInfoDataset.name())) .insertTimeField(strategy.apply(lockInfoDataset.insertTimeField())) .lastUsedTimeField(strategy.apply(lockInfoDataset.lastUsedTimeField())) + .tableNameField(strategy.apply(lockInfoDataset.tableNameField())) .build(); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java index e32437d2795..d3b06d47a4c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java @@ -348,9 +348,9 @@ public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) { operations.add(Drop.of(true, tempDatasetWithDeleteIndicator, true)); } - if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates && stagingDatasetWithoutDuplicates.isPresent()) + if (ingestMode().deduplicationStrategy() instanceof FilterDuplicates) { - operations.add(Drop.of(true, stagingDatasetWithoutDuplicates.get(), true)); + operations.add(Drop.of(true, stagingDatasetWithoutDuplicates.orElseThrow(IllegalStateException::new), true)); } } return LogicalPlan.of(operations); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/IngestStatus.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/IngestStatus.java index c3eba21fcff..40009264af7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/IngestStatus.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/IngestStatus.java @@ -16,5 +16,5 @@ public enum IngestStatus { - SUCCEEDED, ERROR + SUCCEEDED, FAILED } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java index d618993e983..30e8b98d60f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/nontemporal/AppendOnlyTest.java @@ -385,7 +385,6 @@ void testAppendOnlyDoNotCreateTables() throws Exception .ingestMode(ingestMode) .relationalSink(H2Sink.get()) .createDatasets(false) - .enableConcurrentSafety(true) .build(); try { @@ -394,7 +393,7 @@ void testAppendOnlyDoNotCreateTables() throws Exception } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("not found")); + Assertions.assertTrue(e.getMessage().contains("Table \"main\" not found")); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 686c3c9e44e..6cae48022f0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -271,7 +271,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor Date: Fri, 18 Aug 2023 16:44:55 +0800 Subject: [PATCH 09/57] Fix typo --- .../components/ingestmode/mixed/UnitemporalConcurrentTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java index 779e71dcac5..8df2bab20fc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/mixed/UnitemporalConcurrentTest.java @@ -46,7 +46,7 @@ public void test() throws InterruptedException, IOException Thread t2 = new Thread(r2); t2.start(); - // Thread 2 + // Thread 3 String path3 = "src/test/resources/data/unitemporal-incremental-milestoning/input/batch_id_and_time_based/without_delete_ind/staging_data_pass3.csv"; Runnable r3 = new UnitemporalDeltaRunner(path3, "_thread3", H2_USER_NAME, H2_PASSWORD, H2_JDBC_URL, fixedClock_2000_01_01, maxBatchIdCounter); Thread t3 = new Thread(r3); From 30c64f405fc60cc730edec6d29a5a522f79b1e16 Mon Sep 17 00:00:00 2001 From: Ashutosh Date: Fri, 18 Aug 2023 17:04:55 +0800 Subject: [PATCH 10/57] Fix typos in readme --- .../legend-engine-xt-persistence-component/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md index 29720d80319..d7e9a1954ed 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/README.md @@ -138,7 +138,7 @@ Optional Params: Map postIngestStatisticsSql = operations.postIngestStatisticsSql(); // post Ingest stats List metadataIngestSql = operations.metadataIngestSql(); // insert batch into metadata table List postActionsSql = operations.postActionsSql(); // post actions cleanup - List postCleanupSql = operations.postCleanupSql(); // drop temporal tables if any + List postCleanupSql = operations.postCleanupSql(); // drop temporary tables if any NOTE 1: These queries must be strictly run in the order shown below. 1. preActionsSql - Creates tables @@ -241,8 +241,8 @@ Ingestion result provides these fields: | batchId | Batch id generated for the batch. It is an optional field only generated for temporal Ingest modes | | dataSplitRange | This provides the List of dataSplitRange in the staging datasets. This is an optional field returned when we use datasplits | | statisticByName | The statistics generated by the ingestion. The detailed statistics are provided in step 4.2 | -| updatedDatasets | The enriched datasets | -| schemaEvolutionSql | If schema evolution is enabled, this field will return the schema evolutions sqls those were trigerred if any | +| updatedDatasets | The enriched and evolved (if enabled) datasets | +| schemaEvolutionSql | If schema evolution is enabled, this field will return the schema evolution sqls which were trigerred | | status | Ingestion status enum - SUCCEDED or FAILED | | message | Any message generated during the ingestion | | ingestionTimestampUTC | This returns the ingestion timestamp in UTC | From 32630409d10ee2c687bc56b4512d37adb731875a Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 21 Aug 2023 16:56:29 +0800 Subject: [PATCH 11/57] Bug Fix: Empty Batch Handling in Unitemp Snapshot --- .../UnitemporalSnapshotAbstract.java | 21 +++++++ .../planner/UnitemporalSnapshotPlanner.java | 22 ++++++- ...poralSnapshotBatchIdDateTimeBasedTest.java | 31 ++++++++++ ...poralSnapshotBatchIdDateTimeBasedTest.java | 31 ++++++++++ .../unitemporal/UnitemporalSnapshotTest.java | 2 +- .../UnitemporalSnapshotWithBatchIdTest.java | 4 +- .../UnitemporalSnapshotWithBatchTimeTest.java | 2 +- .../with_partition/expected_pass3.csv | 12 ++-- .../with_partition/expected_pass3.csv | 14 ++--- .../with_partition_filter/expected_pass3.csv | 2 +- .../with_partition/expected_pass3.csv | 12 ++-- ...poralSnapshotBatchIdDateTimeBasedTest.java | 32 +++++++++++ ...SnapshotBatchIdDateTimeBasedTestCases.java | 57 +++++++++++++++++++ 13 files changed, 217 insertions(+), 25 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java index 323ee26689a..ac1c1f1c8c0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoned; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoning; +import org.immutables.value.Value; import java.util.List; import java.util.Map; @@ -55,4 +56,24 @@ default T accept(IngestModeVisitor visitor) { return visitor.visitUnitemporalSnapshot(this); } + + @Value.Check + default void validate() + { + // All the keys in partitionValuesByField must be present in partitionFields + if (!partitionValuesByField().isEmpty()) + { + if (partitionFields().size() != partitionValuesByField().size()) + { + throw new IllegalStateException("Can not build UnitemporalSnapshot, size of partitionValuesByField must be same as partitionFields"); + } + for (String partitionKey: partitionValuesByField().keySet()) + { + if (!partitionFields().contains(partitionKey)) + { + throw new IllegalStateException(String.format("Can not build UnitemporalSnapshot, partitionKey: [%s] not specified in partitionFields", partitionKey)); + } + } + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index 15464149218..ec6bd751ba2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -53,6 +53,12 @@ class UnitemporalSnapshotPlanner extends UnitemporalPlanner if (ingestMode.partitioned()) { List fieldNames = stagingDataset().schema().fields().stream().map(Field::name).collect(Collectors.toList()); + // All partitionFields must be present in staging dataset + ingestMode.partitionFields().forEach(field -> validateExistence( + fieldNames, + field, + "Field [" + field + "] from partitionFields not present in incoming dataset")); + // All partitionValuesByField must be present in staging dataset ingestMode.partitionValuesByField().keySet().forEach(field -> validateExistence( fieldNames, field, @@ -75,6 +81,10 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set> values) "batch_id_out" = {TABLE_BATCH_ID} - 1, "batch_out_time" = {BATCH_TIME}" where "batch_id_out" = {MAX_BATCH_ID_VALUE} + // OPTIONAL : when partition values are provided + and sink.partition_key in [VALUE1, VALUE2, ...] */ protected Update sqlToMilestoneAllRows(List> values) { - return UpdateAbstract.of(mainDataset(), values, openRecordCondition); + List conditions = new ArrayList<>(); + conditions.add(openRecordCondition); + + // Handle Partition Values + if (ingestMode().partitioned() && !(ingestMode().partitionValuesByField().isEmpty())) + { + conditions.add(LogicalPlanUtils.getPartitionColumnValueMatchInCondition(mainDataset(), ingestMode().partitionValuesByField())); + } + return UpdateAbstract.of(mainDataset(), values, And.of(conditions)); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 7f933ab8f3c..01477779b23 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -125,6 +125,18 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertTrue(milestoningSql.isEmpty()); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) { @@ -154,6 +166,25 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + + String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + + "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1,sink.\"batch_time_out\" = '2000-01-01 00:00:00' " + + "WHERE (sink.\"batch_id_out\" = 999999999) " + + "AND (sink.\"biz_date\" IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; + + Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalSnapshotWithCleanStagingData(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 32c87325830..cd672e1c725 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -123,6 +123,18 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(0, milestoningSql.size()); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) { @@ -152,6 +164,25 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "WHERE (sink.`batch_id_out` = 999999999) " + + "AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; + + Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalSnapshotWithCleanStagingData(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java index f9c7cabe9b5..ecd80ab6ced 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java @@ -169,7 +169,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(0, 0, 0, 0, 6); + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java index e47e5e79d89..18f78b2c47a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java @@ -157,7 +157,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(0, 0, 0, 0, 7); + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); } @@ -213,7 +213,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionFilter() throws Excepti // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(0, 0, 0, 0, 4); + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 3); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java index 3dde59c6b7f..2782e71cdab 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java @@ -156,7 +156,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(0, 0, 0, 0, 6); + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_03); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass3.csv index bc74a0ac35b..93eee214bc5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/with_partition/expected_pass3.csv @@ -1,8 +1,8 @@ -2021-12-01,IBM,116.92,5958300,DIGEST1,1,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2021-12-01,JPM,161.00,12253400,DIGEST2,1,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2021-12-01,GS,383.82,2476000,DIGEST3,1,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2021-12-02,IBM,117.37,5267100,DIGEST4,1,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-01,IBM,116.92,5958300,DIGEST1,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,12253400,DIGEST2,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,GS,383.82,2476000,DIGEST3,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,IBM,117.37,5267100,DIGEST4,1,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2021-12-02,JPMX,159.83,12969900,DIGEST5,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 2021-12-02,GS,37800.00,3343700,DIGEST6,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2021-12-02,JPM,159.83,12969900,DIGEST7,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 -2021-12-02,GS,378.00,3343700,DIGEST8,2,2,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2021-12-02,JPM,159.83,12969900,DIGEST7,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,378.00,3343700,DIGEST8,2,999999999,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition/expected_pass3.csv index a839f32beec..356e9753a47 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition/expected_pass3.csv @@ -1,9 +1,9 @@ -2021-12-01,IBM,116.92,5958300,DIGEST1,1,2 -2021-12-01,JPM,161.00,12253400,DIGEST2,1,2 -2021-12-01,GS,383.82,2476000,DIGEST3,1,2 -2021-12-02,IBM,117.37,5267100,DIGEST4,1,2 +2021-12-01,IBM,116.92,5958300,DIGEST1,1,999999999 +2021-12-01,JPM,161.00,12253400,DIGEST2,1,999999999 +2021-12-01,GS,383.82,2476000,DIGEST3,1,999999999 +2021-12-02,IBM,117.37,5267100,DIGEST4,1,999999999 2021-12-02,JPMX,159.83,12969900,DIGEST5,1,1 2021-12-02,GS,37800.00,3343700,DIGEST6,1,1 -2021-12-02,JPM,159.83,12969900,DIGEST7,2,2 -2021-12-02,GS,378.00,3343700,DIGEST8,2,2 -2021-12-03,GS,379.00,3343700,DIGEST9,2,2 +2021-12-02,JPM,159.83,12969900,DIGEST7,2,999999999 +2021-12-02,GS,378.00,3343700,DIGEST8,2,999999999 +2021-12-03,GS,379.00,3343700,DIGEST9,2,999999999 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv index 745569d5067..a7b2313f70c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv @@ -6,4 +6,4 @@ 2021-12-02,GS,37800.00,3343700,DIGEST6,1,1 2021-12-02,JPM,159.83,12969900,DIGEST7,2,2 2021-12-02,GS,378.00,3343700,DIGEST8,2,2 -2021-12-03,GS,379.00,3343700,DIGEST9,2,2 +2021-12-03,GS,379.00,3343700,DIGEST9,2,999999999 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv index c22ab5dfc3a..e7232a18a95 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/with_partition/expected_pass3.csv @@ -1,8 +1,8 @@ -2021-12-01,IBM,116.92,5958300,DIGEST1,2000-01-01 00:00:00.0,2000-01-03 00:00:00.0 -2021-12-01,JPM,161.00,12253400,DIGEST2,2000-01-01 00:00:00.0,2000-01-03 00:00:00.0 -2021-12-01,GS,383.82,2476000,DIGEST3,2000-01-01 00:00:00.0,2000-01-03 00:00:00.0 -2021-12-02,IBM,117.37,5267100,DIGEST4,2000-01-01 00:00:00.0,2000-01-03 00:00:00.0 +2021-12-01,IBM,116.92,5958300,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,JPM,161.00,12253400,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-01,GS,383.82,2476000,DIGEST3,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,IBM,117.37,5267100,DIGEST4,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 2021-12-02,JPMX,159.83,12969900,DIGEST5,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 2021-12-02,GS,37800.00,3343700,DIGEST6,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -2021-12-02,JPM,159.83,12969900,DIGEST7,2000-01-02 00:00:00.0,2000-01-03 00:00:00.0 -2021-12-02,GS,378.00,3343700,DIGEST8,2000-01-02 00:00:00.0,2000-01-03 00:00:00.0 +2021-12-02,JPM,159.83,12969900,DIGEST7,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 +2021-12-02,GS,378.00,3343700,DIGEST8,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 08b9c90e479..ed9f18e6c5d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -125,6 +125,19 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o verifyStats(operations, incomingRecordCount, rowsUpdated, rowsDeleted, rowsInserted, rowsTerminated); } + @Override + public void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + Assertions.assertEquals(0, milestoningSql.size()); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations) { @@ -154,6 +167,25 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); } + @Override + public void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations) + { + List preActionsSql = operations.preActionsSql(); + List milestoningSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + + String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + + "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1,sink.`batch_time_out` = '2000-01-01 00:00:00' " + + "WHERE (sink.`batch_id_out` = 999999999) " + + "AND (sink.`biz_date` IN ('2000-01-01 00:00:00','2000-01-02 00:00:00'))"; + + Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableCreateQuery, preActionsSql.get(0)); + Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); + + Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); + } + @Override public void verifyUnitemporalSnapshotWithCleanStagingData(GeneratorResult operations) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java index 97d7a94457f..f14a1da0d72 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java @@ -102,6 +102,22 @@ void testUnitemporalSnapshotWithPartitionNoDataSplits() public abstract void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations); + @Test + void testUnitemporalSnapshotWithPartitionForEmptyBatch() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); + verifyUnitemporalSnapshotWithPartitionForEmptyBatch(operations); + } + + public abstract void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations); + @Test void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() { @@ -118,6 +134,22 @@ void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations); + @Test + void testUnitemporalSnapshotWithPartitionFiltersForEmptyBatch() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(scenario.getIngestMode()) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); + verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(operations); + } + + public abstract void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations); + @Test void testUnitemporalSnapshotWithCleanStagingData() { @@ -219,5 +251,30 @@ void testUnitemporalSnapshotValidationBatchIdInNotPrimaryKey() } } + @Test + void testUnitemporalSnapshotPartitionKeysValidation() + { + try + { + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .addAllPartitionFields(Arrays.asList("business_date")) + .putAllPartitionValuesByField(partitionFilter) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertEquals("Can not build UnitemporalSnapshot, partitionKey: [biz_date] not specified in partitionFields", e.getMessage()); + } + } + + public abstract RelationalSink getRelationalSink(); } From 50faeaef717d831e9970e8f43c51b84ccd8938b5 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 21 Aug 2023 21:32:14 +0800 Subject: [PATCH 12/57] Bug Fix: Code review comments --- .../components/ingestmode/UnitemporalSnapshotAbstract.java | 2 +- .../components/planner/UnitemporalSnapshotPlanner.java | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java index ac1c1f1c8c0..351d0a0ac26 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java @@ -60,7 +60,7 @@ default T accept(IngestModeVisitor visitor) @Value.Check default void validate() { - // All the keys in partitionValuesByField must be present in partitionFields + // All the keys in partitionValuesByField must exactly match the fields in partitionFields if (!partitionValuesByField().isEmpty()) { if (partitionFields().size() != partitionValuesByField().size()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index ec6bd751ba2..27812f4319b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -58,11 +58,6 @@ class UnitemporalSnapshotPlanner extends UnitemporalPlanner fieldNames, field, "Field [" + field + "] from partitionFields not present in incoming dataset")); - // All partitionValuesByField must be present in staging dataset - ingestMode.partitionValuesByField().keySet().forEach(field -> validateExistence( - fieldNames, - field, - "Field [" + field + "] from partitionValuesByField not present in incoming dataset")); } } @@ -80,7 +75,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set operations = new ArrayList<>(); if (resources.stagingDataSetEmpty()) { - // Step 1: Milestone all Records in main table + // Step 1: Empty Batch Handling: Milestone (all/all within partition) records in main table if (ingestMode().partitioned() && ingestMode().partitionValuesByField().isEmpty()) { return LogicalPlan.of(operations); From 7d29ffdff8133f5aef58135b1df3f8b30b2dc80b Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 23 Aug 2023 15:07:03 +0800 Subject: [PATCH 13/57] Implement StagedFilesDatasetReference --- .../datasets/StagedFilesDatasetAbstract.java | 6 +++- .../StagedFilesDatasetProperties.java | 7 +--- .../StagedFilesDatasetReferenceAbstract.java | 34 +++++++++++++++++++ .../relational/snowflake/SnowflakeSink.java | 8 ++--- ...> StagedFilesDatasetReferenceVisitor.java} | 14 ++++---- 5 files changed, 50 insertions(+), 19 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetReferenceAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/{StagedFilesDatasetPropertiesVisitor.java => StagedFilesDatasetReferenceVisitor.java} (82%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetAbstract.java index 5189bb522c8..002049cda25 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetAbstract.java @@ -60,6 +60,10 @@ default SchemaReference schemaReference() @Value.Derived default DatasetReference datasetReference() { - return this.stagedFilesDatasetProperties().withAlias(alias()); + return StagedFilesDatasetReference.builder() + .properties(this.stagedFilesDatasetProperties()) + .addAllColumns(this.schema().fields().stream().map(field -> field.name()).collect(Collectors.toList())) + .alias(alias()) + .build(); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java index bcf38bd154f..c2344edfb45 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetProperties.java @@ -16,12 +16,7 @@ import java.util.List; -public interface StagedFilesDatasetProperties extends DatasetReference +public interface StagedFilesDatasetProperties { List files(); - - default StagedFilesDatasetProperties datasetReference() - { - return this; - } } \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetReferenceAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetReferenceAbstract.java new file mode 100644 index 00000000000..fa655f34d60 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesDatasetReferenceAbstract.java @@ -0,0 +1,34 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.datasets; + +import org.immutables.value.Value; + +import java.util.List; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface StagedFilesDatasetReferenceAbstract extends DatasetReference +{ + StagedFilesDatasetProperties properties(); + + List columns(); +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 6cae48022f0..fd0a6f9f693 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -25,8 +25,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetAdditionalProperties; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; -import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.operations.Alter; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; @@ -58,7 +57,7 @@ import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.ShowVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.DatasetAdditionalPropertiesVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.CopyVisitor; -import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesDatasetPropertiesVisitor; +import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesDatasetReferenceVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesDatasetVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor.DigestUdfVisitor; @@ -120,8 +119,7 @@ public class SnowflakeSink extends AnsiSqlSink logicalPlanVisitorByClass.put(Field.class, new FieldVisitor()); logicalPlanVisitorByClass.put(DatasetAdditionalProperties.class, new DatasetAdditionalPropertiesVisitor()); logicalPlanVisitorByClass.put(Copy.class, new CopyVisitor()); - logicalPlanVisitorByClass.put(StagedFilesDatasetProperties.class, new StagedFilesDatasetPropertiesVisitor()); - logicalPlanVisitorByClass.put(SnowflakeStagedFilesDatasetProperties.class, new StagedFilesDatasetPropertiesVisitor()); + logicalPlanVisitorByClass.put(StagedFilesDatasetReference.class, new StagedFilesDatasetReferenceVisitor()); logicalPlanVisitorByClass.put(StagedFilesDataset.class, new StagedFilesDatasetVisitor()); logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetPropertiesVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java similarity index 82% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetPropertiesVisitor.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java index b9588edaa0d..ccb15d743cb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetPropertiesVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -14,29 +14,29 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; -import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.snowflake.logicalplan.datasets.SnowflakeStagedFilesDatasetProperties; import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.expressions.table.StagedFilesTable; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; import java.util.stream.Collectors; -public class StagedFilesDatasetPropertiesVisitor implements LogicalPlanVisitor +public class StagedFilesDatasetReferenceVisitor implements LogicalPlanVisitor { @Override - public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetProperties current, VisitorContext context) + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference current, VisitorContext context) { - if (!(current instanceof SnowflakeStagedFilesDatasetProperties)) + if (!(current.properties() instanceof SnowflakeStagedFilesDatasetProperties)) { throw new IllegalStateException("Only SnowflakeStagedFilesDatasetProperties are supported for Snowflake Sink"); } - SnowflakeStagedFilesDatasetProperties datasetProperties = (SnowflakeStagedFilesDatasetProperties) current; + SnowflakeStagedFilesDatasetProperties datasetProperties = (SnowflakeStagedFilesDatasetProperties) current.properties(); StagedFilesTable stagedFiles = new StagedFilesTable(datasetProperties.location()); datasetProperties.fileFormat().ifPresent(stagedFiles::setFileFormat); stagedFiles.setFilePattern(datasetProperties.files().stream().map(s -> '(' + s + ')').collect(Collectors.joining("|"))); - datasetProperties.alias().ifPresent(stagedFiles::setAlias); + current.alias().ifPresent(stagedFiles::setAlias); prev.push(stagedFiles); return new VisitorResult(null); } From 136793fe7893b9a700dd2b7e3efbb9a90d62f95a Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 23 Aug 2023 15:39:07 +0800 Subject: [PATCH 14/57] Implement StagedFilesSelection --- .../StagedFilesSelectionAbstract.java | 49 +++++++++++++++++++ .../components/planner/BulkLoadPlanner.java | 4 +- .../relational/snowflake/SnowflakeSink.java | 3 ++ .../visitor/StagedFilesSelectionVisitor.java | 44 +++++++++++++++++ 4 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesSelectionVisitor.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java new file mode 100644 index 00000000000..60efdfb69f7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java @@ -0,0 +1,49 @@ +// Copyright 2022 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.datasets; + +import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.immutables.value.Value.Derived; +import org.immutables.value.Value.Immutable; +import org.immutables.value.Value.Style; + +import java.util.List; +import java.util.Optional; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface StagedFilesSelectionAbstract extends Dataset, Operation +{ + List fields(); + + StagedFilesDataset source(); + + Optional alias(); + + @Derived + default DatasetReference datasetReference() + { + return DatasetReferenceImpl.builder() + .alias(alias()) + .build(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index dae4d51dd6c..2780ef791a4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -21,6 +21,7 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; @@ -104,8 +105,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesSelection current, VisitorContext context) + { + SelectStatement selectStatement = new SelectStatement(); + current.alias().ifPresent(selectStatement::setAlias); + prev.push(selectStatement); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.source().datasetReference()); + logicalPlanNodeList.addAll(current.fields()); + selectStatement.setSelectItemsSize((long) current.fields().size()); + + return new VisitorResult(selectStatement, logicalPlanNodeList); + } +} From bb9b3adbb53cef1a7c9d75bffed05de103963b60 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 23 Aug 2023 17:28:37 +0800 Subject: [PATCH 15/57] Support for Empty Batch Handling in Unitemporal Snapshot --- .../UnitemporalSnapshotAbstract.java | 8 ++ .../handling/DeleteTargetDataAbstract.java | 35 +++++++++ .../handling/EmptyDatasetHandling.java | 20 +++++ .../handling/EmptyDatasetHandlingVisitor.java | 22 ++++++ .../ingestmode/handling/NoOpAbstract.java | 35 +++++++++ .../planner/UnitemporalSnapshotPlanner.java | 46 +++++++++--- .../UnitemporalSnapshotBatchIdBasedTest.java | 8 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 6 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 2 +- .../UnitemporalSnapshotBatchIdBasedTest.java | 8 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 6 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 2 +- .../unitemporal/UnitemporalSnapshotTest.java | 2 + .../UnitemporalSnapshotWithBatchIdTest.java | 26 ++++++- .../UnitemporalSnapshotWithBatchTimeTest.java | 8 +- .../with_partition_filter/expected_pass3.csv | 6 +- .../without_partition/expected_pass3.csv | 8 +- .../UnitemporalSnapshotBatchIdBasedTest.java | 8 +- ...poralSnapshotBatchIdDateTimeBasedTest.java | 6 +- .../UnitemporalSnapshotDateTimeBasedTest.java | 2 +- ...temporalSnapshotBatchIdBasedScenarios.java | 2 + ...SnapshotBatchIdDateTimeBasedScenarios.java | 3 + ...memporalSnapshotBatchIdBasedTestCases.java | 6 +- ...SnapshotBatchIdDateTimeBasedTestCases.java | 74 ++++++++++++++++--- ...emporalSnapshotDateTimeBasedTestCases.java | 7 +- 25 files changed, 288 insertions(+), 68 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java index 351d0a0ac26..94dd1629914 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.ingestmode; +import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; +import org.finos.legend.engine.persistence.components.ingestmode.handling.EmptyDatasetHandling; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoned; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoning; import org.immutables.value.Value; @@ -51,6 +53,12 @@ default boolean partitioned() return !partitionFields().isEmpty(); } + @Value.Default + default EmptyDatasetHandling emptyDatasetHandling() + { + return DeleteTargetData.builder().build(); + } + @Override default T accept(IngestModeVisitor visitor) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java new file mode 100644 index 00000000000..33433e30af2 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.handling; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface DeleteTargetDataAbstract extends EmptyDatasetHandling +{ + @Override + default T accept(EmptyDatasetHandlingVisitor visitor) + { + return visitor.visitDeleteTargetDataset(this); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java new file mode 100644 index 00000000000..1123e1e4106 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java @@ -0,0 +1,20 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.handling; + +public interface EmptyDatasetHandling +{ + T accept(EmptyDatasetHandlingVisitor visitor); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java new file mode 100644 index 00000000000..749e61b12c0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java @@ -0,0 +1,22 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.handling; + +public interface EmptyDatasetHandlingVisitor +{ + T visitNoOp(NoOpAbstract noOpAbstract); + + T visitDeleteTargetDataset(DeleteTargetDataAbstract deleteTargetDataAbstract); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java new file mode 100644 index 00000000000..c1c9d358e30 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.handling; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface NoOpAbstract extends EmptyDatasetHandling +{ + @Override + default T accept(EmptyDatasetHandlingVisitor visitor) + { + return visitor.visitNoOp(this); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index 27812f4319b..615e0fc9de5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -17,6 +17,9 @@ import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetDataAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.handling.EmptyDatasetHandlingVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOpAbstract; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; @@ -72,25 +75,20 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set> keyValuePairs = keyValuesForMilestoningUpdate(); - List operations = new ArrayList<>(); if (resources.stagingDataSetEmpty()) { - // Step 1: Empty Batch Handling: Milestone (all/all within partition) records in main table - if (ingestMode().partitioned() && ingestMode().partitionValuesByField().isEmpty()) - { - return LogicalPlan.of(operations); - } - operations.add(sqlToMilestoneAllRows(keyValuePairs)); + // Empty Dataset handling + return ingestMode().emptyDatasetHandling().accept(new EmptyDatasetHandler(keyValuePairs)); } else { + List operations = new ArrayList<>(); // Step 1: Milestone Records in main table operations.add(getSqlToMilestoneRows(keyValuePairs)); // Step 2: Insert records in main table operations.add(sqlToUpsertRows()); + return LogicalPlan.of(operations); } - - return LogicalPlan.of(operations); } @Override @@ -250,4 +248,34 @@ protected Update sqlToMilestoneAllRows(List> values) } return UpdateAbstract.of(mainDataset(), values, And.of(conditions)); } + + + private class EmptyDatasetHandler implements EmptyDatasetHandlingVisitor + { + List> keyValuePairs; + + public EmptyDatasetHandler(List> keyValuePairs) + { + this.keyValuePairs = keyValuePairs; + } + + @Override + public LogicalPlan visitNoOp(NoOpAbstract noOpAbstract) + { + List operations = new ArrayList<>(); + return LogicalPlan.of(operations); + } + + @Override + public LogicalPlan visitDeleteTargetDataset(DeleteTargetDataAbstract deleteTargetDataAbstract) + { + List operations = new ArrayList<>(); + if (ingestMode().partitioned() && ingestMode().partitionValuesByField().isEmpty()) + { + return LogicalPlan.of(operations); + } + operations.add(sqlToMilestoneAllRows(keyValuePairs)); + return LogicalPlan.of(operations); + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java index 5abcbe0e34b..b30f182ed29 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdBasedTest.java @@ -72,19 +72,15 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE \"mydb\".\"main\" as sink " + - "SET sink.\"batch_id_out\" = (SELECT COALESCE(MAX(batch_metadata.\"table_batch_id\"),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.\"table_name\") = 'MAIN')-1 " + - "WHERE sink.\"batch_id_out\" = 999999999"; - Assertions.assertEquals(AnsiTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(getExpectedMetadataTableCreateQuery(), preActionsSql.get(1)); - Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(0, milestoningSql.size()); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index 01477779b23..b9fed006365 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -63,7 +63,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -126,7 +126,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -167,7 +167,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java index 0f814024864..acec611ab3a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotDateTimeBasedTest.java @@ -64,7 +64,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java index 0ae929f03ed..219547c5ad1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java @@ -61,19 +61,15 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + - "WHERE sink.`batch_id_out` = 999999999"; - Assertions.assertEquals(BigQueryTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(BigQueryTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); - Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(0, milestoningSql.size()); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index cd672e1c725..e6a778a1df7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -61,7 +61,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -124,7 +124,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -165,7 +165,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index 79621de87fb..3d2af712a4e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -62,7 +62,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java index ecd80ab6ced..ac55137410a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java @@ -53,6 +53,7 @@ class UnitemporalSnapshotTest extends BaseTest /* Scenario: Test milestoning Logic without Partition when staging table pre populated + Empty batch handling - default */ @Test void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception @@ -115,6 +116,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception /* Scenario: Test milestoning Logic with Partition when staging table pre populated + Empty Batch Handling : Default */ @Test void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java index 18f78b2c47a..90f0a70bade 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java @@ -17,7 +17,10 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; +import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -51,6 +54,7 @@ class UnitemporalSnapshotWithBatchIdTest extends BaseTest /* Scenario: Test milestoning Logic without Partition when staging table pre populated + Empty batch handling - DeleteTargetData */ @Test void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception @@ -70,6 +74,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception .batchIdInName(batchIdInName) .batchIdOutName(batchIdOutName) .build()) + .emptyDatasetHandling(DeleteTargetData.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -108,6 +113,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception /* Scenario: Test milestoning Logic with Partition when staging table pre populated + Empty Batch Handling : DeleteTargetData */ @Test void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception @@ -128,6 +134,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception .batchIdOutName(batchIdOutName) .build()) .addAllPartitionFields(Collections.singletonList(dateName)) + .emptyDatasetHandling(DeleteTargetData.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); @@ -207,14 +214,27 @@ void testUnitemporalSnapshotMilestoningLogicWithPartitionFilter() throws Excepti expectedStats = createExpectedStatsMap(4, 0, 2, 1, 4); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats); - // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) ------------------------ + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch - No Op) ------------------------ + IngestMode ingestModeWithNoOpBatchHandling = ingestMode.withEmptyDatasetHandling(NoOp.builder().build()); + String dataPass3 = basePathForInput + "with_partition_filter/staging_data_pass3.csv"; - String expectedDataPass3 = basePathForExpected + "with_partition_filter/expected_pass3.csv"; + String expectedDataPass3 = basePathForExpected + "with_partition_filter/expected_pass2.csv"; + // 1. Load Staging table + loadStagingDataForWithPartition(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + executePlansAndVerifyResults(ingestModeWithNoOpBatchHandling, options, datasets, schema, expectedDataPass3, expectedStats); + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch - Delete target Data) ------------------------ + IngestMode ingestModeWithDeleteTargetData = ingestMode.withEmptyDatasetHandling(DeleteTargetData.builder().build()); + dataPass3 = basePathForInput + "with_partition_filter/staging_data_pass3.csv"; + expectedDataPass3 = basePathForExpected + "with_partition_filter/expected_pass3.csv"; // 1. Load Staging table loadStagingDataForWithPartition(dataPass3); // 2. Execute plans and verify results expectedStats = createExpectedStatsMap(0, 0, 0, 0, 3); - executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats); + executePlansAndVerifyResults(ingestModeWithDeleteTargetData, options, datasets, schema, expectedDataPass3, expectedStats); } /* diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java index 2782e71cdab..ed369f1ab2b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java @@ -18,6 +18,8 @@ import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; +import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -50,6 +52,7 @@ class UnitemporalSnapshotWithBatchTimeTest extends BaseTest /* Scenario: Test milestoning Logic without Partition when staging table pre populated + Empty batch handling - NoOp */ @Test void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception @@ -69,6 +72,7 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception .dateTimeInName(batchTimeInName) .dateTimeOutName(batchTimeOutName) .build()) + .emptyDatasetHandling(NoOp.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); @@ -101,12 +105,13 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception // 1. Load Staging table loadBasicStagingData(dataPass3); // 2. Execute plans and verify results - expectedStats = createExpectedStatsMap(0, 0, 0, 0, 4); + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_03); } /* Scenario: Test milestoning Logic with Partition when staging table pre populated + Empty Batch Handling : NoOp */ @Test void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception @@ -127,6 +132,7 @@ void testUnitemporalSnapshotMilestoningLogicWithPartition() throws Exception .dateTimeOutName(batchTimeOutName) .build()) .addAllPartitionFields(Collections.singletonList(dateName)) + .emptyDatasetHandling(NoOp.builder().build()) .build(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv index a7b2313f70c..994d408a3a4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_based/with_partition_filter/expected_pass3.csv @@ -1,9 +1,9 @@ 2021-12-01,IBM,116.92,5958300,DIGEST1,1,1 2021-12-01,JPM,161.00,12253400,DIGEST2,1,1 2021-12-01,GS,383.82,2476000,DIGEST3,1,1 -2021-12-02,IBM,117.37,5267100,DIGEST4,1,2 +2021-12-02,IBM,117.37,5267100,DIGEST4,1,3 2021-12-02,JPMX,159.83,12969900,DIGEST5,1,1 2021-12-02,GS,37800.00,3343700,DIGEST6,1,1 -2021-12-02,JPM,159.83,12969900,DIGEST7,2,2 -2021-12-02,GS,378.00,3343700,DIGEST8,2,2 +2021-12-02,JPM,159.83,12969900,DIGEST7,2,3 +2021-12-02,GS,378.00,3343700,DIGEST8,2,3 2021-12-03,GS,379.00,3343700,DIGEST9,2,999999999 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv index f7ec485cdd0..32412eb20f4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/time_based/without_partition/expected_pass3.csv @@ -1,5 +1,5 @@ -1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,2000-01-03 00:00:00.0 -2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,2000-01-03 00:00:00.0 +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,2000-01-01 00:00:00.0,9999-12-31 23:59:59.0 3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,2000-01-01 00:00:00.0,2000-01-02 00:00:00.0 -3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2000-01-02 00:00:00.0,2000-01-03 00:00:00.0 -4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.0,2000-01-03 00:00:00.0 \ No newline at end of file +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2000-01-02 00:00:00.0,9999-12-31 23:59:59.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java index b4aa4d14c24..e1624d0c58d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdBasedTest.java @@ -63,19 +63,15 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); List metadataIngestSql = operations.metadataIngestSql(); - String expectedMilestoneQuery = "UPDATE `mydb`.`main` as sink " + - "SET sink.`batch_id_out` = (SELECT COALESCE(MAX(batch_metadata.`table_batch_id`),0)+1 FROM batch_metadata as batch_metadata WHERE UPPER(batch_metadata.`table_name`) = 'MAIN')-1 " + - "WHERE sink.`batch_id_out` = 999999999"; - Assertions.assertEquals(MemsqlTestArtifacts.expectedMainTableBatchIdBasedCreateQuery, preActionsSql.get(0)); Assertions.assertEquals(MemsqlTestArtifacts.expectedMetadataTableCreateQuery, preActionsSql.get(1)); - Assertions.assertEquals(expectedMilestoneQuery, milestoningSql.get(0)); + Assertions.assertEquals(0, milestoningSql.size()); Assertions.assertEquals(getExpectedMetadataTableIngestQuery(), metadataIngestSql.get(0)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java index ed9f18e6c5d..22dfbe78a9f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotBatchIdDateTimeBasedTest.java @@ -63,7 +63,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -126,7 +126,7 @@ public void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult o } @Override - public void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); @@ -168,7 +168,7 @@ public void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorR } @Override - public void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java index 4b6d2a452c7..6dee33f7e13 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotDateTimeBasedTest.java @@ -64,7 +64,7 @@ public void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResul } @Override - public void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations) + public void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(GeneratorResult operations) { List preActionsSql = operations.preActionsSql(); List milestoningSql = operations.ingestSql(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java index 4c830b72489..2905e53cd59 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import java.util.Arrays; @@ -47,6 +48,7 @@ public TestScenario BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS() .batchIdInName(batchIdInField) .batchIdOutName(batchIdOutField) .build()) + .emptyDatasetHandling(NoOp.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdBasedSchema, stagingTableWithBaseSchemaAndDigest, ingestMode); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java index 179488d578b..3caa986ce69 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import java.util.Arrays; @@ -49,6 +50,7 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS( .dateTimeInName(batchTimeInField) .dateTimeOutName(batchTimeOutField) .build()) + .emptyDatasetHandling(DeleteTargetData.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } @@ -91,6 +93,7 @@ public TestScenario BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLI .build()) .addAllPartitionFields(Arrays.asList(partitionKeys)) .putAllPartitionValuesByField(partitionFilter) + .emptyDatasetHandling(DeleteTargetData.builder().build()) .build(); return new TestScenario(mainTableWithBatchIdAndTime, stagingTableWithBaseSchemaAndDigest, ingestMode); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java index 395bac86ebb..1930004c250 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdBasedTestCases.java @@ -54,7 +54,7 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithoutPartitionForEmptyBatch() + void testUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling() { TestScenario scenario = scenarios.BATCH_ID_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); RelationalGenerator generator = RelationalGenerator.builder() @@ -64,10 +64,10 @@ void testUnitemporalSnapshotWithoutPartitionForEmptyBatch() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(operations); + verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionWithNoOpEmptyBatchHandling(GeneratorResult operations); @Test void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java index f14a1da0d72..9f1f046f6fc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java @@ -17,7 +17,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; +import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.CaseConversion; @@ -25,7 +25,6 @@ import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.scenarios.TestScenario; -import org.finos.legend.engine.persistence.components.scenarios.UnitemporalSnapshotBatchIdBasedScenarios; import org.finos.legend.engine.persistence.components.scenarios.UnitemporalSnapshotBatchIdDateTimeBasedScenarios; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -54,7 +53,7 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithoutPartitionForEmptyBatch() + void testUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling() { TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); RelationalGenerator generator = RelationalGenerator.builder() @@ -64,10 +63,10 @@ void testUnitemporalSnapshotWithoutPartitionForEmptyBatch() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(operations); + verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionWithDeleteTargetDataEmptyBatchHandling(GeneratorResult operations); @Test void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() @@ -103,7 +102,7 @@ void testUnitemporalSnapshotWithPartitionNoDataSplits() public abstract void verifyUnitemporalSnapshotWithPartitionNoDataSplits(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionForEmptyBatch() + void testUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling() { TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); RelationalGenerator generator = RelationalGenerator.builder() @@ -113,10 +112,36 @@ void testUnitemporalSnapshotWithPartitionForEmptyBatch() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionForEmptyBatch(operations); + verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionForEmptyBatch(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(GeneratorResult operations); + + @Test + void testUnitemporalSnapshotWithPartitionWithNoOpEmptyBatchHandling() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITIONS__NO_DATA_SPLITS(); + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .addAllPartitionFields(Arrays.asList(partitionKeys)) + .emptyDatasetHandling(NoOp.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(ingestMode) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); + verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(operations); + } @Test void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() @@ -135,7 +160,7 @@ void testUnitemporalSnapshotWithPartitionFiltersNoDataSplits() public abstract void verifyUnitemporalSnapshotWithPartitionFiltersNoDataSplits(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithPartitionFiltersForEmptyBatch() + void testUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling() { TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); RelationalGenerator generator = RelationalGenerator.builder() @@ -145,10 +170,37 @@ void testUnitemporalSnapshotWithPartitionFiltersForEmptyBatch() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); - verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(operations); + verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling(operations); } - public abstract void verifyUnitemporalSnapshotWithPartitionFiltersForEmptyBatch(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithPartitionFiltersWithDeleteTargetDataEmptyDataHandling(GeneratorResult operations); + + @Test + void testUnitemporalSnapshotWithPartitionFiltersWithNoOpEmptyDataHandling() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITH_PARTITION_FILTER__NO_DATA_SPLITS(); + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .addAllPartitionFields(Arrays.asList(partitionKeys)) + .putAllPartitionValuesByField(partitionFilter) + .emptyDatasetHandling(NoOp.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(ingestMode) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); + verifyUnitemporalSnapshotWithPartitionWithDefaultEmptyDataHandling(operations); + } @Test void testUnitemporalSnapshotWithCleanStagingData() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java index c0b288d334d..bd79faf2246 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotDateTimeBasedTestCases.java @@ -17,7 +17,6 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.CaseConversion; @@ -53,7 +52,7 @@ void testUnitemporalSnapshotWithoutPartitionNoDataSplits() public abstract void verifyUnitemporalSnapshotWithoutPartitionNoDataSplits(GeneratorResult operations); @Test - void testUnitemporalSnapshotWithoutPartitionForEmptyBatch() + void testUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling() { TestScenario scenario = scenarios.DATETIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); RelationalGenerator generator = RelationalGenerator.builder() @@ -63,10 +62,10 @@ void testUnitemporalSnapshotWithoutPartitionForEmptyBatch() .collectStatistics(true) .build(); GeneratorResult operations = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); - verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(operations); + verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(operations); } - public abstract void verifyUnitemporalSnapshotWithoutPartitionForEmptyBatch(GeneratorResult operations); + public abstract void verifyUnitemporalSnapshotWithoutPartitionWithDefaultEmptyBatchHandling(GeneratorResult operations); @Test void testUnitemporalSnapshotWithoutPartitionWithUpperCaseOptimizer() From 871461b379bd0ccc13fd263fab6107dbcb1926b2 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Thu, 24 Aug 2023 12:24:23 +0800 Subject: [PATCH 16/57] Support for FailEmptyBatch strategy in Unitemporal Snapshot --- .../UnitemporalSnapshotAbstract.java | 4 +-- .../DeleteTargetDataAbstract.java | 4 +-- .../EmptyDatasetHandling.java | 2 +- .../EmptyDatasetHandlingVisitor.java | 6 ++-- .../emptyhandling/FailEmptyBatchAbstract.java | 35 ++++++++++++++++++ .../NoOpAbstract.java | 2 +- .../planner/UnitemporalSnapshotPlanner.java | 15 +++++--- .../UnitemporalSnapshotWithBatchIdTest.java | 4 +-- .../UnitemporalSnapshotWithBatchTimeTest.java | 30 ++++++++++++++-- ...temporalSnapshotBatchIdBasedScenarios.java | 2 +- ...SnapshotBatchIdDateTimeBasedScenarios.java | 2 +- ...SnapshotBatchIdDateTimeBasedTestCases.java | 36 ++++++++++++++++++- 12 files changed, 123 insertions(+), 19 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/{handling => emptyhandling}/DeleteTargetDataAbstract.java (94%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/{handling => emptyhandling}/EmptyDatasetHandling.java (97%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/{handling => emptyhandling}/EmptyDatasetHandlingVisitor.java (81%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/FailEmptyBatchAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/{handling => emptyhandling}/NoOpAbstract.java (98%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java index 94dd1629914..0ed6847395f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/UnitemporalSnapshotAbstract.java @@ -14,8 +14,8 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; -import org.finos.legend.engine.persistence.components.ingestmode.handling.EmptyDatasetHandling; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.DeleteTargetData; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.EmptyDatasetHandling; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoned; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionMilestoning; import org.immutables.value.Value; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/DeleteTargetDataAbstract.java similarity index 94% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/DeleteTargetDataAbstract.java index 33433e30af2..0d0974dbea2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/DeleteTargetDataAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/DeleteTargetDataAbstract.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.handling; +package org.finos.legend.engine.persistence.components.ingestmode.emptyhandling; import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; @@ -30,6 +30,6 @@ public interface DeleteTargetDataAbstract extends EmptyDatasetHandling @Override default T accept(EmptyDatasetHandlingVisitor visitor) { - return visitor.visitDeleteTargetDataset(this); + return visitor.visitDeleteTargetData(this); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/EmptyDatasetHandling.java similarity index 97% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/EmptyDatasetHandling.java index 1123e1e4106..38a0730f157 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandling.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/EmptyDatasetHandling.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.handling; +package org.finos.legend.engine.persistence.components.ingestmode.emptyhandling; public interface EmptyDatasetHandling { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/EmptyDatasetHandlingVisitor.java similarity index 81% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/EmptyDatasetHandlingVisitor.java index 749e61b12c0..ba610edeb47 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/EmptyDatasetHandlingVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/EmptyDatasetHandlingVisitor.java @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.handling; +package org.finos.legend.engine.persistence.components.ingestmode.emptyhandling; public interface EmptyDatasetHandlingVisitor { T visitNoOp(NoOpAbstract noOpAbstract); - T visitDeleteTargetDataset(DeleteTargetDataAbstract deleteTargetDataAbstract); + T visitDeleteTargetData(DeleteTargetDataAbstract deleteTargetDataAbstract); + + T visitFailEmptyBatch(FailEmptyBatchAbstract failEmptyBatchAbstract); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/FailEmptyBatchAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/FailEmptyBatchAbstract.java new file mode 100644 index 00000000000..7cf2060e413 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/FailEmptyBatchAbstract.java @@ -0,0 +1,35 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.emptyhandling; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface FailEmptyBatchAbstract extends EmptyDatasetHandling +{ + @Override + default T accept(EmptyDatasetHandlingVisitor visitor) + { + return visitor.visitFailEmptyBatch(this); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/NoOpAbstract.java similarity index 98% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/NoOpAbstract.java index c1c9d358e30..b6dffd3d82b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/handling/NoOpAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/emptyhandling/NoOpAbstract.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.ingestmode.handling; +package org.finos.legend.engine.persistence.components.ingestmode.emptyhandling; import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index 615e0fc9de5..019adbbffc5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -17,9 +17,10 @@ import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetDataAbstract; -import org.finos.legend.engine.persistence.components.ingestmode.handling.EmptyDatasetHandlingVisitor; -import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOpAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.DeleteTargetDataAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.EmptyDatasetHandlingVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.FailEmptyBatchAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOpAbstract; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; @@ -267,7 +268,7 @@ public LogicalPlan visitNoOp(NoOpAbstract noOpAbstract) } @Override - public LogicalPlan visitDeleteTargetDataset(DeleteTargetDataAbstract deleteTargetDataAbstract) + public LogicalPlan visitDeleteTargetData(DeleteTargetDataAbstract deleteTargetDataAbstract) { List operations = new ArrayList<>(); if (ingestMode().partitioned() && ingestMode().partitionValuesByField().isEmpty()) @@ -277,5 +278,11 @@ public LogicalPlan visitDeleteTargetDataset(DeleteTargetDataAbstract deleteTarge operations.add(sqlToMilestoneAllRows(keyValuePairs)); return LogicalPlan.of(operations); } + + @Override + public LogicalPlan visitFailEmptyBatch(FailEmptyBatchAbstract failEmptyBatchAbstract) + { + throw new RuntimeException("Encountered an Empty Batch, FailEmptyBatch is enabled, so failing the batch!"); + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java index 90f0a70bade..f29f4ed594c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchIdTest.java @@ -19,8 +19,8 @@ import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; -import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.DeleteTargetData; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java index ed369f1ab2b..02b22293a2a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotWithBatchTimeTest.java @@ -18,8 +18,8 @@ import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; -import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.FailEmptyBatch; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.TransactionDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -107,6 +107,32 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception // 2. Execute plans and verify results expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_03); + + // ------------ Perform unitemporal snapshot milestoning Pass4 (Empty Batch With FailOnEmptyBatchEnabled) ------------------------ + UnitemporalSnapshot ingestModeWithFailOnEmptyBatch = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(TransactionDateTime.builder() + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .emptyDatasetHandling(FailEmptyBatch.builder().build()) + .build(); + + dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; + // 1. Load Staging table + loadBasicStagingData(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + try + { + executePlansAndVerifyResults(ingestModeWithFailOnEmptyBatch, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_03); + Assertions.fail("Exception was not thrown!"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered an Empty Batch, FailEmptyBatch is enabled, so failing the batch!", e.getMessage()); + } } /* diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java index 2905e53cd59..5162e437250 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdBasedScenarios.java @@ -16,7 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchId; import java.util.Arrays; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java index 3caa986ce69..57419aa4c90 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/scenarios/UnitemporalSnapshotBatchIdDateTimeBasedScenarios.java @@ -16,7 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.handling.DeleteTargetData; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.DeleteTargetData; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import java.util.Arrays; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java index 9f1f046f6fc..f012b3bec1a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/unitemporal/UnitmemporalSnapshotBatchIdDateTimeBasedTestCases.java @@ -17,7 +17,8 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; -import org.finos.legend.engine.persistence.components.ingestmode.handling.NoOp; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.FailEmptyBatch; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.relational.CaseConversion; @@ -327,6 +328,39 @@ void testUnitemporalSnapshotPartitionKeysValidation() } } + @Test + void testUnitemporalSnapshotFailOnEmptyBatch() + { + TestScenario scenario = scenarios.BATCH_ID_AND_TIME_BASED__WITHOUT_PARTITIONS__NO_DATA_SPLITS(); + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestField) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInField) + .batchIdOutName(batchIdOutField) + .dateTimeInName(batchTimeInField) + .dateTimeOutName(batchTimeOutField) + .build()) + .emptyDatasetHandling(FailEmptyBatch.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(ingestMode) + .relationalSink(getRelationalSink()) + .executionTimestampClock(fixedClock_2000_01_01) + .collectStatistics(true) + .build(); + + try + { + GeneratorResult queries = generator.generateOperationsForEmptyBatch(scenario.getDatasets()); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered an Empty Batch, FailEmptyBatch is enabled, so failing the batch!", e.getMessage()); + } + } + public abstract RelationalSink getRelationalSink(); } From fd35802076fe4085dce1bd1e9b2581681571f95d Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 28 Aug 2023 20:00:47 +0800 Subject: [PATCH 17/57] Enrich datasets to add additionalDatasetproperties every where --- .../ingestmode/DeriveMainDatasetSchemaFromStaging.java | 3 ++- .../components/ingestmode/IngestModeCaseConverter.java | 1 + .../persistence/components/executor/Executor.java | 2 +- .../components/executor/RelationalExecutionHelper.java | 2 +- .../components/relational/ansi/AnsiSqlSink.java | 2 +- .../schemaevolution/SchemaEvolutionTest.java | 2 +- .../components/relational/bigquery/BigQuerySink.java | 2 +- .../relational/bigquery/executor/BigQueryExecutor.java | 4 ++-- .../relational/bigquery/executor/BigQueryHelper.java | 5 ++++- .../components/e2e/SchemaEvolutionTest.java | 4 ++-- .../components/relational/RelationalSink.java | 2 +- .../relational/api/RelationalIngestorAbstract.java | 10 +--------- .../relational/executor/RelationalExecutor.java | 4 ++-- .../components/relational/jdbc/JdbcHelper.java | 7 +++++-- .../persistence/components/relational/h2/H2Sink.java | 2 +- .../components/relational/memsql/MemSqlSink.java | 2 +- .../components/relational/snowflake/SnowflakeSink.java | 2 +- 17 files changed, 28 insertions(+), 28 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index 107d84f33a6..6c4840d90e9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -57,7 +57,8 @@ public DeriveMainDatasetSchemaFromStaging(Dataset mainDataset, Dataset stagingDa .name(mainDataset.datasetReference().name().get()) .database(mainDataset.datasetReference().database()) .group(mainDataset.datasetReference().group()) - .alias(mainDataset.datasetReference().alias().orElse(null)); + .alias(mainDataset.datasetReference().alias().orElse(null)) + .datasetAdditionalProperties(mainDataset.datasetAdditionalProperties()); this.mainSchemaDefinitionBuilder = SchemaDefinition.builder() .addAllIndexes(mainDataset.schema().indexes()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java index 6bfba0bd509..8770d654fcd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java @@ -111,6 +111,7 @@ public IngestMode visitUnitemporalSnapshot(UnitemporalSnapshotAbstract unitempor .transactionMilestoning(unitemporalSnapshot.transactionMilestoning().accept(new TransactionMilestoningCaseConverter())) .addAllPartitionFields(applyCase(unitemporalSnapshot.partitionFields())) .putAllPartitionValuesByField(applyCase(unitemporalSnapshot.partitionValuesByField())) + .emptyDatasetHandling(unitemporalSnapshot.emptyDatasetHandling()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java index 0548d5f0f12..4219e6227a9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/executor/Executor.java @@ -35,7 +35,7 @@ public interface Executor + (x, y, z) -> { throw new UnsupportedOperationException(); }); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java index 071f5cc6efe..c0ac0883216 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/schemaevolution/SchemaEvolutionTest.java @@ -70,7 +70,7 @@ static class TestSink extends AnsiSqlSink { throw new UnsupportedOperationException(); }, - (v, w, x, y, z) -> + (x, y, z) -> { throw new UnsupportedOperationException(); }); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 38bc18f26b6..8facedb9946 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -136,7 +136,7 @@ private BigQuerySink() LOGICAL_PLAN_VISITOR_BY_CLASS, (executor, sink, dataset) -> sink.doesTableExist(dataset), (executor, sink, dataset) -> sink.validateDatasetSchema(dataset, new BigQueryDataTypeMapping()), - (executor, sink, tableName, schemaName, databaseName) -> sink.constructDatasetFromDatabase(tableName, schemaName, databaseName, new BigQueryDataTypeToLogicalDataTypeMapping())); + (executor, sink, dataset) -> sink.constructDatasetFromDatabase(dataset, new BigQueryDataTypeToLogicalDataTypeMapping())); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java index da3aaecf223..ddddb6a06e7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java @@ -105,9 +105,9 @@ public void validateMainDatasetSchema(Dataset dataset) } @Override - public Dataset constructDatasetFromDatabase(String tableName, String schemaName, String databaseName) + public Dataset constructDatasetFromDatabase(Dataset dataset) { - return bigQuerySink.constructDatasetFromDatabaseFn().execute(this, bigQueryHelper, tableName, schemaName, databaseName); + return bigQuerySink.constructDatasetFromDatabaseFn().execute(this, bigQueryHelper, dataset); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 867b7c7b096..47f3cbb03d7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -207,8 +207,11 @@ public void validateDatasetSchema(Dataset dataset, TypeMapping typeMapping) validateColumns(userColumns, dbColumns); } - public Dataset constructDatasetFromDatabase(String tableName, String schemaName, String databaseName, TypeMapping typeMapping) + public Dataset constructDatasetFromDatabase(Dataset dataset, TypeMapping typeMapping) { + String tableName = dataset.datasetReference().name().orElseThrow(IllegalStateException::new); + String schemaName = dataset.datasetReference().group().orElse(null); + String databaseName = dataset.datasetReference().database().orElse(null); if (!(typeMapping instanceof JdbcPropertiesToLogicalDataTypeMapping)) { throw new IllegalStateException("Only JdbcPropertiesToLogicalDataTypeMapping allowed in constructDatasetFromDatabase"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/SchemaEvolutionTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/SchemaEvolutionTest.java index f9078b4266f..5398884f267 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/SchemaEvolutionTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/SchemaEvolutionTest.java @@ -83,7 +83,7 @@ public void testSchemaValidation() throws IOException createTable(relationalExecutor, transformer, dataset); relationalSink.validateMainDatasetSchemaFn().execute(relationalExecutor, bigQueryHelper, dataset); - Dataset datasetConstructedFromDb = relationalSink.constructDatasetFromDatabaseFn().execute(relationalExecutor, bigQueryHelper, tableName, datasetName, projectId); + Dataset datasetConstructedFromDb = relationalSink.constructDatasetFromDatabaseFn().execute(relationalExecutor, bigQueryHelper, dataset); relationalSink.validateMainDatasetSchemaFn().execute(relationalExecutor, bigQueryHelper, datasetConstructedFromDb); Assertions.assertEquals(dataset.withSchema(schemaWithAllColumnsFromDb), datasetConstructedFromDb); } @@ -435,7 +435,7 @@ public void testSchemaEvolution() throws IOException DatasetDefinition datasetDefinitionStage = list.get(stage); DatasetDefinition datasetDefinitionMain = list.get(main); refreshDataset(relationalExecutor, transformer, datasetDefinitionMain, null); - Dataset datasetMain = relationalSink.constructDatasetFromDatabaseFn().execute(relationalExecutor, bigQueryHelper, datasetDefinitionMain.name(), datasetName, projectId); + Dataset datasetMain = relationalSink.constructDatasetFromDatabaseFn().execute(relationalExecutor, bigQueryHelper, datasetDefinitionMain); FieldType typeStage = datasetDefinitionStage.schema().fields().get(0).type(); FieldType typeMain = datasetMain.schema().fields().get(0).type(); DataType dataTypeStage = typeStage.dataType(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index f7808d4a0fb..91bfe084310 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -185,7 +185,7 @@ public interface ValidateMainDatasetSchema public interface ConstructDatasetFromDatabase { - Dataset execute(Executor executor, RelationalExecutionHelper sink, String tableName, String schemaName, String databaseName); + Dataset execute(Executor executor, RelationalExecutionHelper sink, Dataset dataset); } public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan sqlPlan, Map placeHolderKeyValues); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 3344a247fdc..12f904df358 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -435,7 +435,7 @@ private void init(Datasets datasets) mainDatasetExists = executor.datasetExists(enrichedDatasets.mainDataset()); if (mainDatasetExists && enableSchemaEvolution()) { - enrichedDatasets = enrichedDatasets.withMainDataset(constructDatasetFromDatabase(executor, enrichedDatasets.mainDataset())); + enrichedDatasets = enrichedDatasets.withMainDataset(executor.constructDatasetFromDatabase(enrichedDatasets.mainDataset())); } else { @@ -598,14 +598,6 @@ private boolean datasetEmpty(Dataset dataset, Transformer trans return !value.equals(TABLE_IS_NON_EMPTY); } - private Dataset constructDatasetFromDatabase(Executor executor, Dataset dataset) - { - String tableName = dataset.datasetReference().name().orElseThrow(IllegalStateException::new); - String schemaName = dataset.datasetReference().group().orElse(null); - String databaseName = dataset.datasetReference().database().orElse(null); - return executor.constructDatasetFromDatabase(tableName, schemaName, databaseName); - } - private Map executeStatisticsPhysicalPlan(Executor executor, Map statisticsSqlPlan, Map placeHolderKeyValues) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java index 0c20f9a8067..8a7e014048d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/executor/RelationalExecutor.java @@ -100,9 +100,9 @@ public void validateMainDatasetSchema(Dataset dataset) } @Override - public Dataset constructDatasetFromDatabase(String tableName, String schemaName, String databaseName) + public Dataset constructDatasetFromDatabase(Dataset dataset) { - return relationalSink.constructDatasetFromDatabaseFn().execute(this, relationalExecutionHelper, tableName, schemaName, databaseName); + return relationalSink.constructDatasetFromDatabaseFn().execute(this, relationalExecutionHelper, dataset); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java index 8d907c95a7d..f1136234cbb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/jdbc/JdbcHelper.java @@ -256,8 +256,11 @@ public void validateDatasetSchema(Dataset dataset, TypeMapping typeMapping) } @Override - public Dataset constructDatasetFromDatabase(String tableName, String schemaName, String databaseName, TypeMapping typeMapping) + public Dataset constructDatasetFromDatabase(Dataset dataset, TypeMapping typeMapping) { + String tableName = dataset.datasetReference().name().orElseThrow(IllegalStateException::new); + String schemaName = dataset.datasetReference().group().orElse(null); + String databaseName = dataset.datasetReference().database().orElse(null); try { if (!(typeMapping instanceof JdbcPropertiesToLogicalDataTypeMapping)) @@ -344,7 +347,7 @@ public Dataset constructDatasetFromDatabase(String tableName, String schemaName, } SchemaDefinition schemaDefinition = SchemaDefinition.builder().addAllFields(fields).addAllIndexes(indices).build(); - return DatasetDefinition.builder().name(tableName).database(databaseName).group(schemaName).schema(schemaDefinition).build(); + return DatasetDefinition.builder().name(tableName).database(databaseName).group(schemaName).schema(schemaDefinition).datasetAdditionalProperties(dataset.datasetAdditionalProperties()).build(); } catch (SQLException e) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 76f7deb7726..38dee24b765 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -140,7 +140,7 @@ private H2Sink() LOGICAL_PLAN_VISITOR_BY_CLASS, (executor, sink, dataset) -> sink.doesTableExist(dataset), (executor, sink, dataset) -> sink.validateDatasetSchema(dataset, new H2DataTypeMapping()), - (executor, sink, tableName, schemaName, databaseName) -> sink.constructDatasetFromDatabase(tableName, schemaName, databaseName, new H2JdbcPropertiesToLogicalDataTypeMapping())); + (executor, sink, dataset) -> sink.constructDatasetFromDatabase(dataset, new H2JdbcPropertiesToLogicalDataTypeMapping())); } @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/MemSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/MemSqlSink.java index dc1085f9aba..697ddba8f6e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/MemSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-memsql/src/main/java/org/finos/legend/engine/persistence/components/relational/memsql/MemSqlSink.java @@ -156,7 +156,7 @@ private MemSqlSink() LOGICAL_PLAN_VISITOR_BY_CLASS, (executor, sink, dataset) -> sink.doesTableExist(dataset), VALIDATE_MAIN_DATASET_SCHEMA, - (v, w, x, y, z) -> + (x, y, z) -> { throw new UnsupportedOperationException(); }); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 6cae48022f0..7f62dda47ce 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -188,7 +188,7 @@ private SnowflakeSink() return results.size() > 0; }, (executor, sink, dataset) -> sink.validateDatasetSchema(dataset, new SnowflakeDataTypeMapping()), - (executor, sink, tableName, schemaName, databaseName) -> sink.constructDatasetFromDatabase(tableName, schemaName, databaseName, new SnowflakeJdbcPropertiesToLogicalDataTypeMapping())); + (executor, sink, dataset) -> sink.constructDatasetFromDatabase(dataset, new SnowflakeJdbcPropertiesToLogicalDataTypeMapping())); } @Override From 6bd3eef22afe5ba151f7b25e811c9c37ab265594 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 28 Aug 2023 20:54:03 +0800 Subject: [PATCH 18/57] Add tests for Empty Data handling --- .../unitemporal/UnitemporalSnapshotTest.java | 127 ++++++++++++++++-- .../without_partition/expected_pass4.csv | 5 + 2 files changed, 118 insertions(+), 14 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass4.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java index ac55137410a..92126d34a4a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/unitemporal/UnitemporalSnapshotTest.java @@ -18,6 +18,8 @@ import org.finos.legend.engine.persistence.components.TestUtils; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshot; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.FailEmptyBatch; +import org.finos.legend.engine.persistence.components.ingestmode.emptyhandling.NoOp; import org.finos.legend.engine.persistence.components.ingestmode.transactionmilestoning.BatchIdAndDateTime; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -31,20 +33,7 @@ import java.util.List; import java.util.Map; -import static org.finos.legend.engine.persistence.components.TestUtils.batchIdInName; -import static org.finos.legend.engine.persistence.components.TestUtils.batchIdOutName; -import static org.finos.legend.engine.persistence.components.TestUtils.batchTimeInName; -import static org.finos.legend.engine.persistence.components.TestUtils.batchTimeOutName; -import static org.finos.legend.engine.persistence.components.TestUtils.priceName; -import static org.finos.legend.engine.persistence.components.TestUtils.dateName; -import static org.finos.legend.engine.persistence.components.TestUtils.digestName; -import static org.finos.legend.engine.persistence.components.TestUtils.expiryDateName; -import static org.finos.legend.engine.persistence.components.TestUtils.idName; -import static org.finos.legend.engine.persistence.components.TestUtils.incomeName; -import static org.finos.legend.engine.persistence.components.TestUtils.nameName; -import static org.finos.legend.engine.persistence.components.TestUtils.startTimeName; -import static org.finos.legend.engine.persistence.components.TestUtils.entityName; -import static org.finos.legend.engine.persistence.components.TestUtils.volumeName; +import static org.finos.legend.engine.persistence.components.TestUtils.*; class UnitemporalSnapshotTest extends BaseTest { @@ -113,6 +102,116 @@ void testUnitemporalSnapshotMilestoningLogicWithoutPartition() throws Exception executePlansAndVerifyResults(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); } + @Test + void testUnitemporalSnapshotMilestoningLogicWithoutPartitionWithCaseConversion() throws Exception + { + DatasetDefinition mainTable = TestUtils.getDefaultMainTable(); + DatasetDefinition stagingTable = TestUtils.getBasicStagingTable(); + + String[] schema = new String[]{idName.toUpperCase(), nameName.toUpperCase(), incomeName.toUpperCase(), startTimeName.toUpperCase(), expiryDateName.toUpperCase(), digestName.toUpperCase(), batchIdInName.toUpperCase(), batchIdOutName.toUpperCase()}; + + // Create staging table + h2Sink.executeStatement("CREATE TABLE IF NOT EXISTS \"TEST\".\"STAGING\"(\"ID\" INTEGER NOT NULL,\"NAME\" VARCHAR(64) NOT NULL,\"INCOME\" BIGINT,\"START_TIME\" TIMESTAMP NOT NULL,\"EXPIRY_DATE\" DATE,\"DIGEST\" VARCHAR,PRIMARY KEY (\"ID\", \"START_TIME\"))"); + + UnitemporalSnapshot ingestMode = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .build(); + + PlannerOptions options = PlannerOptions.builder().cleanupStagingData(false).collectStatistics(true).build(); + Datasets datasets = Datasets.of(mainTable, stagingTable); + + // ------------ Perform unitemporal snapshot milestoning Pass1 ------------------------ + String dataPass1 = basePathForInput + "without_partition/staging_data_pass1.csv"; + String expectedDataPass1 = basePathForExpected + "without_partition/expected_pass1.csv"; + // 1. Load staging table + loadBasicStagingDataInUpperCase(dataPass1); + // 2. Execute plans and verify results + Map expectedStats = createExpectedStatsMap(3, 0, 3, 0, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass1, expectedStats, fixedClock_2000_01_01); + // 3. Assert that the staging table is NOT truncated + List> stagingTableList = h2Sink.executeQuery("select * from \"TEST\".\"STAGING\""); + Assertions.assertEquals(stagingTableList.size(), 3); + + // ------------ Perform unitemporal snapshot milestoning Pass2 ------------------------ + String dataPass2 = basePathForInput + "without_partition/staging_data_pass2.csv"; + String expectedDataPass2 = basePathForExpected + "without_partition/expected_pass2.csv"; + // 1. Load staging table + loadBasicStagingDataInUpperCase(dataPass2); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(4, 0, 1, 1, 0); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass2, expectedStats, fixedClock_2000_01_01); + + // ------------ Perform unitemporal snapshot milestoning Pass3 (Empty Batch) Empty Data Handling = Fail ------------------------ + UnitemporalSnapshot ingestModeWithFailOnEmptyBatchStrategy = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .emptyDatasetHandling(FailEmptyBatch.builder().build()) + .build(); + + options = options.withCleanupStagingData(true); + + String dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + String expectedDataPass3 = basePathForExpected + "without_partition/expected_pass3.csv"; + // 1. Load Staging table + loadBasicStagingDataInUpperCase(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 4); + try + { + executePlansAndVerifyForCaseConversion(ingestModeWithFailOnEmptyBatchStrategy, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + Assertions.fail("Exception should be thrown"); + } + catch (Exception e) + { + Assertions.assertEquals("Encountered an Empty Batch, FailEmptyBatch is enabled, so failing the batch!", e.getMessage()); + } + + // ------------ Perform unitemporal snapshot milestoning Pass5 (Empty Batch) Empty Data Handling = Skip ------------------------ + UnitemporalSnapshot ingestModeWithSkipEmptyBatchStrategy = UnitemporalSnapshot.builder() + .digestField(digestName) + .transactionMilestoning(BatchIdAndDateTime.builder() + .batchIdInName(batchIdInName) + .batchIdOutName(batchIdOutName) + .dateTimeInName(batchTimeInName) + .dateTimeOutName(batchTimeOutName) + .build()) + .emptyDatasetHandling(NoOp.builder().build()) + .build(); + + options = options.withCleanupStagingData(true); + + dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + expectedDataPass3 = basePathForExpected + "without_partition/expected_pass2.csv"; + // 1. Load Staging table + loadBasicStagingDataInUpperCase(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 0); + executePlansAndVerifyForCaseConversion(ingestModeWithSkipEmptyBatchStrategy, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + + + // ------------ Perform unitemporal snapshot milestoning Pass6 (Empty Batch) Empty Data Handling = Skip ------------------------ + options = options.withCleanupStagingData(true); + + dataPass3 = basePathForInput + "without_partition/staging_data_pass3.csv"; + expectedDataPass3 = basePathForExpected + "without_partition/expected_pass4.csv"; + // 1. Load Staging table + loadBasicStagingDataInUpperCase(dataPass3); + // 2. Execute plans and verify results + expectedStats = createExpectedStatsMap(0, 0, 0, 0, 4); + executePlansAndVerifyForCaseConversion(ingestMode, options, datasets, schema, expectedDataPass3, expectedStats, fixedClock_2000_01_01); + } + /* Scenario: Test milestoning Logic with Partition when staging table pre populated diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass4.csv new file mode 100644 index 00000000000..0bd04d8f4bb --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/unitemporal-snapshot-milestoning/expected/batch_id_and_time_based/without_partition/expected_pass4.csv @@ -0,0 +1,5 @@ +1,HARRY,1000,2020-01-01 00:00:00.0,2022-12-01,DIGEST1,1,3,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +2,ROBERT,2000,2020-01-02 00:00:00.0,2022-12-02,DIGEST2,1,3,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3000,2020-01-03 00:00:00.0,2022-12-03,DIGEST3,1,1,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +3,ANDY,3100,2020-01-03 00:00:00.0,2022-12-06,DIGEST3_UPDATED,2,3,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 +4,MATT,6000,2020-01-06 00:00:00.0,2022-12-06,DIGEST4,2,3,2000-01-01 00:00:00.0,2000-01-01 00:00:00.0 \ No newline at end of file From a0bccd49f03d4d8a52643d85cc3219b696c71c6e Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Mon, 28 Aug 2023 22:57:22 +0800 Subject: [PATCH 19/57] Support ICEBERG_TABLE_2022 for Iceberg tables --- .../sqldom/schemaops/statements/CreateTable.java | 8 ++++++++ .../logicalplan/operations/CreateTableTest.java | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java index a77d0ca215c..7f34cf5a513 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CreateTable.java @@ -46,6 +46,8 @@ public class CreateTable implements DDLStatement private final List clusterKeys; private Map tags; + private static final String ICEBERG_CATALOG_INTEGRATION_SUFFIX = "ICEBERG_TABLE_2022 = true"; + public CreateTable() { this.modifiers = new ArrayList<>(); @@ -118,6 +120,12 @@ public void genSql(StringBuilder builder) throws SqlDomException } builder.append(CLOSING_PARENTHESIS); } + + // Iceberg unified Catalog suppoprt + if (types.stream().anyMatch(tableType -> tableType instanceof IcebergTableType)) + { + builder.append(WHITE_SPACE + ICEBERG_CATALOG_INTEGRATION_SUFFIX); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java index b69974e5031..c8a96a8745b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/logicalplan/operations/CreateTableTest.java @@ -193,7 +193,7 @@ public void testCreateIcebergTable() "\"col_string\" VARCHAR,\"col_timestamp\" TIMESTAMP,\"col_datetime\" DATETIME,\"col_date\" DATE," + "\"col_real\" DOUBLE,\"col_float\" DOUBLE,\"col_decimal\" NUMBER(10,4),\"col_double\" DOUBLE," + "\"col_binary\" BINARY,\"col_time\" TIME,\"col_numeric\" NUMBER(38,0),\"col_boolean\" BOOLEAN," + - "\"col_varbinary\" BINARY(10))"; + "\"col_varbinary\" BINARY(10)) ICEBERG_TABLE_2022 = true"; Assertions.assertEquals(expected, list.get(0)); } From 16aece89bdddae505b81caa3406eb0f97759c07c Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 29 Aug 2023 11:51:28 +0800 Subject: [PATCH 20/57] Implement bulk load for h2 --- .../values/StagedFilesFieldValueAbstract.java | 4 +- .../components/util/LogicalPlanUtils.java | 2 +- .../ansi/sql/visitors/SelectionVisitor.java | 6 - .../relational/sqldom/common/Clause.java | 3 +- .../pom.xml | 7 ++ .../components/relational/h2/H2Sink.java | 15 +++ ...2StagedFilesDatasetPropertiesAbstract.java | 40 ++++++ .../h2/sql/visitor/CopyVisitor.java | 42 +++++++ .../StagedFilesDatasetReferenceVisitor.java | 39 ++++++ .../visitor/StagedFilesDatasetVisitor.java | 40 ++++++ .../visitor/StagedFilesFieldValueVisitor.java | 42 +++++++ .../visitor/StagedFilesSelectionVisitor.java | 44 +++++++ .../schemaops/statements/CopyStatement.java | 117 ++++++++++++++++++ .../schemaops/values/StagedFilesField.java | 75 +++++++++++ .../visitor/StagedFilesDatasetVisitor.java | 15 ++- .../visitor/StagedFilesFieldValueVisitor.java | 2 +- .../schemaops/statements/CopyStatement.java | 2 +- 17 files changed, 475 insertions(+), 20 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CopyVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/StagedFilesField.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/StagedFilesFieldValueAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/StagedFilesFieldValueAbstract.java index 96434fa77dd..02992f421cf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/StagedFilesFieldValueAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/StagedFilesFieldValueAbstract.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.logicalplan.values; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import java.util.Optional; @@ -35,5 +35,5 @@ public interface StagedFilesFieldValueAbstract extends FieldValueAbstract Optional elementPath(); - DataType dataType(); + FieldType fieldType(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 074ddf70e26..60c36200d08 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -394,7 +394,7 @@ public static List extractStagedFilesFieldValues(Dataset dataset) .datasetRefAlias(dataset.datasetReference().alias()) .alias(field.fieldAlias().isPresent() ? field.fieldAlias().get() : field.name()) .elementPath(field.elementPath()) - .dataType(field.type().dataType()) + .fieldType(field.type()) .fieldName(field.name()) .build(); stagedFilesFields.add(fieldValue); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java index 5dd59090b04..81934718367 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/SelectionVisitor.java @@ -20,7 +20,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DerivedDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; @@ -60,11 +59,6 @@ select id from (select * from table where condition) conditions.add(filterCondition); logicalPlanNodeList.add(derivedDataset.datasetReference()); } - else if (dataset instanceof StagedFilesDataset) - { - StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) dataset; - logicalPlanNodeList.add(stagedFilesDataset.datasetReference()); - } else { logicalPlanNodeList.add(dataset); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index 4fb85be11dd..a69e1672d9c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -56,7 +56,8 @@ public enum Clause ORDER_BY("ORDER BY"), CLUSTER_BY("CLUSTER BY"), NOT_ENFORCED("NOT ENFORCED"), - DATA_TYPE("DATA TYPE"); + DATA_TYPE("DATA TYPE"), + CONVERT("CONVERT"); private final String clause; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml index dc84c4c021a..8c42fb7fadf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml @@ -43,6 +43,13 @@ org.finos.legend.engine legend-engine-xt-persistence-component-relational-ansi + + + + org.immutables + value + + com.h2database diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 76f7deb7726..925bba46347 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -21,9 +21,14 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.LoadCsv; import org.finos.legend.engine.persistence.components.logicalplan.values.HashFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; +import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -33,12 +38,17 @@ import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2JdbcPropertiesToLogicalDataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CopyVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CsvExternalDatasetReferenceVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.HashFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.LoadCsvVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.SchemaDefinitionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ParseJsonFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.FieldVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesDatasetReferenceVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesDatasetVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesFieldValueVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesSelectionVisitor; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.util.Capability; @@ -86,6 +96,11 @@ public class H2Sink extends AnsiSqlSink logicalPlanVisitorByClass.put(LoadCsv.class, new LoadCsvVisitor()); logicalPlanVisitorByClass.put(CsvExternalDatasetReference.class, new CsvExternalDatasetReferenceVisitor()); logicalPlanVisitorByClass.put(Field.class, new FieldVisitor()); + logicalPlanVisitorByClass.put(Copy.class, new CopyVisitor()); + logicalPlanVisitorByClass.put(StagedFilesDataset.class, new StagedFilesDatasetVisitor()); + logicalPlanVisitorByClass.put(StagedFilesSelection.class, new StagedFilesSelectionVisitor()); + logicalPlanVisitorByClass.put(StagedFilesDatasetReference.class, new StagedFilesDatasetReferenceVisitor()); + logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); Map> implicitDataTypeMapping = new HashMap<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java new file mode 100644 index 00000000000..c3651179add --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java @@ -0,0 +1,40 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +package org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; +import org.immutables.value.Value; + + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface H2StagedFilesDatasetPropertiesAbstract extends StagedFilesDatasetProperties +{ + @Value.Check + default void validate() + { + if (files().size() != 1) + { + throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only 1 file per load supported"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CopyVisitor.java new file mode 100644 index 00000000000..9f966cdff6a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/CopyVisitor.java @@ -0,0 +1,42 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.statements.CopyStatement; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class CopyVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, Copy current, VisitorContext context) + { + CopyStatement copyStatement = new CopyStatement(); + prev.push(copyStatement); + + List logicalPlanNodes = new ArrayList<>(); + logicalPlanNodes.add(current.sourceDataset()); + logicalPlanNodes.add(current.targetDataset()); + logicalPlanNodes.addAll(current.fields()); + return new VisitorResult(copyStatement, logicalPlanNodes); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java new file mode 100644 index 00000000000..2751f8fbffd --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -0,0 +1,39 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.expresssions.table.CsvRead; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + + +public class StagedFilesDatasetReferenceVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference current, VisitorContext context) + { + if (!(current.properties() instanceof H2StagedFilesDatasetProperties)) + { + throw new IllegalStateException("Only H2StagedFilesDatasetProperties are supported for H2 Sink"); + } + H2StagedFilesDatasetProperties datasetProperties = (H2StagedFilesDatasetProperties) current.properties(); + CsvRead csvRead = new CsvRead(datasetProperties.files().get(0), String.join(",", current.columns()), "NULL"); + prev.push(csvRead); + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java new file mode 100644 index 00000000000..068721db03f --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetVisitor.java @@ -0,0 +1,40 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; +import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; + +import java.util.List; + +public class StagedFilesDatasetVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDataset current, VisitorContext context) + { + List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); + StagedFilesSelection selection = StagedFilesSelection.builder() + .source(current) + .addAllFields(allColumns) + .alias(current.datasetReference().alias()) + .build(); + return new StagedFilesSelectionVisitor().visit(prev, selection, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java new file mode 100644 index 00000000000..ad8fbd0cce8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java @@ -0,0 +1,42 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.StagedFilesField; +import org.finos.legend.engine.persistence.components.relational.sql.DataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class StagedFilesFieldValueVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesFieldValue current, VisitorContext context) + { + DataType dataType = getDataTypeMapping().getDataType(current.fieldType()); + StagedFilesField field = new StagedFilesField(context.quoteIdentifier(), current.fieldName(), dataType); + prev.push(field); + return new VisitorResult(null); + } + + public DataTypeMapping getDataTypeMapping() + { + return new H2DataTypeMapping(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java new file mode 100644 index 00000000000..d8c789e9bfc --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java @@ -0,0 +1,44 @@ +// Copyright 2022 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class StagedFilesSelectionVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesSelection current, VisitorContext context) + { + SelectStatement selectStatement = new SelectStatement(); + current.alias().ifPresent(selectStatement::setAlias); + prev.push(selectStatement); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.source().datasetReference()); + logicalPlanNodeList.addAll(current.fields()); + selectStatement.setSelectItemsSize((long) current.fields().size()); + + return new VisitorResult(selectStatement, logicalPlanNodeList); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java new file mode 100644 index 00000000000..9849a55e18a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java @@ -0,0 +1,117 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.statements; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.SelectStatement; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Field; + +import java.util.ArrayList; +import java.util.List; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause.INSERT_INTO; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class CopyStatement implements DMLStatement +{ + private Table table; + private final List columns; + private SelectStatement selectStatement; + + public CopyStatement() + { + columns = new ArrayList<>(); + } + + public CopyStatement(Table table, List columns, SelectStatement selectStatement) + { + this.table = table; + this.columns = columns; + this.selectStatement = selectStatement; + } + + /* + Copy GENERIC PLAN for H2: + INSERT INTO table_name (COLUMN_LIST) + SELECT [CONVERT("column_name", column_type) , ...] + FROM CSVREAD('{FILE_PATH}','{CSV_COLUMN_NAMES}',NULL) + */ + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + validate(); + builder.append(INSERT_INTO.get()); + builder.append(WHITE_SPACE); + + // Add table name + table.genSqlWithoutAlias(builder); + builder.append(WHITE_SPACE); + + // Add column names + if (columns != null && columns.size() > 0) + { + builder.append(OPEN_PARENTHESIS); + for (int i = 0; i < columns.size(); i++) + { + columns.get(i).genSqlWithNameOnly(builder); + if (i < (columns.size() - 1)) + { + builder.append(COMMA + WHITE_SPACE); + } + } + builder.append(CLOSING_PARENTHESIS); + } + + builder.append(WHITE_SPACE); + + selectStatement.genSql(builder); + } + + @Override + public void push(Object node) + { + if (node instanceof Table) + { + table = (Table) node; + } + else if (node instanceof Field) + { + columns.add((Field) node); + } + else if (node instanceof SelectStatement) + { + selectStatement = (SelectStatement) node; + } + } + + void validate() throws SqlDomException + { + if (selectStatement == null) + { + throw new SqlDomException("selectStatement is mandatory for Copy Table Command"); + } + + if (table == null) + { + throw new SqlDomException("table is mandatory for Copy Table Command"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/StagedFilesField.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/StagedFilesField.java new file mode 100644 index 00000000000..ae5fcf9c1cb --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/StagedFilesField.java @@ -0,0 +1,75 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause.CONVERT; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; + +public class StagedFilesField extends Value +{ + private String columnName; + + private DataType dataType; + + public StagedFilesField(String quoteIdentifier, String columnName, DataType datatype) + { + super(quoteIdentifier); + this.columnName = columnName; + this.dataType = datatype; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + validate(); + builder.append(CONVERT); + builder.append(OPEN_PARENTHESIS); + builder.append(SqlGenUtils.getQuotedField(columnName, getQuoteIdentifier())); + builder.append(COMMA); + dataType.genSql(builder); + builder.append(CLOSING_PARENTHESIS); + } + + @Override + public void push(Object node) + { + } + + void validate() throws SqlDomException + { + if (columnName == null) + { + throw new SqlDomException("columnName is empty"); + } + if (dataType == null) + { + throw new SqlDomException("dataType is empty"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java index d212690a061..c22d0b6ed44 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesDatasetVisitor.java @@ -14,11 +14,10 @@ package org.finos.legend.engine.persistence.components.relational.snowflake.sql.visitor; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; -import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.SelectionVisitor; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; @@ -31,11 +30,11 @@ public class StagedFilesDatasetVisitor implements LogicalPlanVisitor allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); - Selection selection = Selection.builder() - .source(current.datasetReference()) - .addAllFields(allColumns) - .alias(current.datasetReference().alias()) - .build(); - return new SelectionVisitor().visit(prev, selection, context); + StagedFilesSelection selection = StagedFilesSelection.builder() + .source(current) + .addAllFields(allColumns) + .alias(current.datasetReference().alias()) + .build(); + return new StagedFilesSelectionVisitor().visit(prev, selection, context); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesFieldValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesFieldValueVisitor.java index 43761abf05d..89200d7882d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesFieldValueVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesFieldValueVisitor.java @@ -36,7 +36,7 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesFieldValue current, current.alias().ifPresent(stageField::setAlias); current.datasetRefAlias().ifPresent(stageField::setDatasetReferenceAlias); - if (current.dataType().equals(DataType.VARIANT) || current.dataType().equals(DataType.JSON)) + if (current.fieldType().dataType().equals(DataType.VARIANT) || current.fieldType().dataType().equals(DataType.JSON)) { Function parseJson = new Function(FunctionName.fromName("PARSE_JSON"), Arrays.asList(stageField), null, context.quoteIdentifier()); Function toVariant = new Function(FunctionName.fromName("TO_VARIANT"), Arrays.asList(parseJson), current.alias().orElse(null), context.quoteIdentifier()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java index 638ac41e948..04480fb8659 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/statements/CopyStatement.java @@ -113,7 +113,7 @@ void validate() throws SqlDomException { if (selectStatement == null) { - throw new SqlDomException("selectStatement is mandatory for Select Statement"); + throw new SqlDomException("selectStatement is mandatory for Copy Table Command"); } if (table == null) From 738f9c820db2bcab0743d45127d2d21cb47b2fa6 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 29 Aug 2023 12:06:21 +0800 Subject: [PATCH 21/57] Add missing datasetAdditionalProperties in BigQueryHelper --- .../components/relational/bigquery/executor/BigQueryHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 47f3cbb03d7..84c5ed5f186 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -250,7 +250,7 @@ public Dataset constructDatasetFromDatabase(Dataset dataset, TypeMapping typeMap SchemaDefinition schemaDefinition = SchemaDefinition.builder() .addAllFields(fields) .build(); - return DatasetDefinition.builder().name(tableName).database(databaseName).group(schemaName).schema(schemaDefinition).build(); + return DatasetDefinition.builder().name(tableName).database(databaseName).group(schemaName).schema(schemaDefinition).datasetAdditionalProperties(dataset.datasetAdditionalProperties()).build(); } private List fetchPrimaryKeys(String tableName, String schemaName, String databaseName) From e82af33badcb41d5cfeb493be12d6248a6ff082d Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 29 Aug 2023 17:01:54 +0800 Subject: [PATCH 22/57] Implement executor flow and add end-to-end test --- .../relational/ansi/AnsiSqlSink.java | 3 +- .../components/relational/RelationalSink.java | 3 +- .../api/RelationalIngestorAbstract.java | 2 +- .../components/relational/h2/H2Sink.java | 44 ++++ .../StagedFilesDatasetReferenceVisitor.java | 2 +- .../ingestmode/bulkload/BulkLoadTest.java | 222 ++++++++++++++++++ .../data/bulk-load/data/staged_file1.csv | 3 + .../relational/snowflake/SnowflakeSink.java | 4 +- .../components/ingestmode/BulkLoadTest.java | 6 +- 9 files changed, 280 insertions(+), 9 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/data/staged_file1.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 166cce7ceba..d52ea0c5c10 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.ansi; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; @@ -312,7 +313,7 @@ private static Map, LogicalPlanVisitor> rightBiasedUnion(Map executor, SqlPlan sqlPlan, Map placeHolderKeyValues) + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { throw new UnsupportedOperationException("Bulk Load not supported!"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java index 91bfe084310..d58f1472853 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/RelationalSink.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; @@ -188,5 +189,5 @@ public interface ConstructDatasetFromDatabase Dataset execute(Executor executor, RelationalExecutionHelper sink, Dataset dataset); } - public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan sqlPlan, Map placeHolderKeyValues); + public abstract IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 12f904df358..5f6f605a0ef 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -527,7 +527,7 @@ private List performBulkLoad(Datasets datasets, Transformer placeHolderKeyValues = extractPlaceHolderKeyValues(datasets, executor, planner, transformer, ingestMode, Optional.empty()); // Execute ingest SqlPlan - IngestorResult result = relationalSink().performBulkLoad(datasets, executor, generatorResult.ingestSqlPlan(), placeHolderKeyValues); + IngestorResult result = relationalSink().performBulkLoad(datasets, executor, generatorResult.ingestSqlPlan(), generatorResult.postIngestStatisticsSqlPlan(), placeHolderKeyValues); // Execute metadata ingest SqlPlan if (generatorResult.metadataIngestSqlPlan().isPresent()) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 32d9fbd8391..20026025ab9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -14,8 +14,11 @@ package org.finos.legend.engine.persistence.components.relational.h2; +import java.util.List; import java.util.Optional; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; @@ -36,6 +39,8 @@ import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; +import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; +import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2JdbcPropertiesToLogicalDataTypeMapping; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CopyVisitor; @@ -69,6 +74,8 @@ import java.util.Map; import java.util.Set; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; + public class H2Sink extends AnsiSqlSink { private static final RelationalSink INSTANCE; @@ -187,4 +194,41 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers throw new IllegalArgumentException("Unrecognized case conversion: " + caseConversion); } } + + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) + { + executor.executePhysicalPlan(ingestSqlPlan, placeHolderKeyValues); + + long incomingRecordCount = 0; + long rowsInserted = 0; + + SqlPlan incomingRecordCountSqlPlan = statisticsSqlPlan.get(StatisticName.INCOMING_RECORD_COUNT); + SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); + + List incomingRecordCountResults = executor.executePhysicalPlanAndGetResults(incomingRecordCountSqlPlan, placeHolderKeyValues); + List rowsInsertedResults = executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues); + + for (Map row: incomingRecordCountResults.get(0).getData()) + { + incomingRecordCount += (Long) row.get(StatisticName.INCOMING_RECORD_COUNT.get()); + } + for (Map row: rowsInsertedResults.get(0).getData()) + { + rowsInserted += (Long) row.get(StatisticName.ROWS_INSERTED.get()); + } + + IngestorResult result; + Map stats = new HashMap<>(); + stats.put(StatisticName.ROWS_INSERTED, rowsInserted); + stats.put(StatisticName.INCOMING_RECORD_COUNT, incomingRecordCount); + stats.put(StatisticName.FILES_LOADED, 1); + result = IngestorResult.builder() + .status(IngestStatus.SUCCEEDED) + .updatedDatasets(datasets) + .putAllStatisticByName(stats) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) + .build(); + + return result; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java index 2751f8fbffd..b697e4140ea 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -32,7 +32,7 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu throw new IllegalStateException("Only H2StagedFilesDatasetProperties are supported for H2 Sink"); } H2StagedFilesDatasetProperties datasetProperties = (H2StagedFilesDatasetProperties) current.properties(); - CsvRead csvRead = new CsvRead(datasetProperties.files().get(0), String.join(",", current.columns()), "NULL"); + CsvRead csvRead = new CsvRead(datasetProperties.files().get(0), String.join(",", current.columns()), null); prev.push(csvRead); return new VisitorResult(null); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java new file mode 100644 index 00000000000..6c1f864cb95 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -0,0 +1,222 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.bulkload; + +import org.finos.legend.engine.persistence.components.BaseTest; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.StatisticName; +import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.planner.PlannerOptions; +import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; +import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.time.Clock; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.finos.legend.engine.persistence.components.TestUtils.mainTableName; +import static org.finos.legend.engine.persistence.components.TestUtils.testDatabaseName; +import static org.finos.legend.engine.persistence.components.TestUtils.testSchemaName; +import static org.finos.legend.engine.persistence.components.common.StatisticName.INCOMING_RECORD_COUNT; +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; + +public class BulkLoadTest extends BaseTest +{ + private static final String APPEND_TIME = "append_time"; + private static final String DIGEST = "digest"; + private static final String col_int = "col_int"; + private static final String col_string = "col_string"; + private static final String col_decimal = "col_decimal"; + private static final String col_datetime = "col_datetime"; + + private static Field col1 = Field.builder() + .name(col_int) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .primaryKey(true) + .build(); + private static Field col2 = Field.builder() + .name(col_string) + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .build(); + private static Field col3 = Field.builder() + .name(col_decimal) + .type(FieldType.of(DataType.DECIMAL, 5, 2)) + .build(); + private static Field col4 = Field.builder() + .name(col_datetime) + .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) + .build(); + + protected final ZonedDateTime fixedZonedDateTime_2000_01_01 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); + protected final Clock fixedClock_2000_01_01 = Clock.fixed(fixedZonedDateTime_2000_01_01.toInstant(), ZoneOffset.UTC); + + @Test + public void testBulkLoadWithDigestNotGenerated() throws Exception + { + String filePath = "src/test/resources/data/bulk-load/data/staged_file1.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .digestField(DIGEST) + .generateDigest(false) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .addAllFiles(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"append_time\") " + + "SELECT " + + "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP),'2000-01-01 00:00:00' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/data/staged_file1.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + + Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime}; + + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); + expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + + executePlansAndVerifyResults(bulkLoad, options, datasets, schema, filePath, expectedStats, fixedClock_2000_01_01); + } + + @Test + public void testBulkLoadDigestColumnNotProvided() + { + try + { + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(true) + .digestUdfName("LAKEHOUSE_UDF") + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("For digest generation, digestField & digestUdfName are mandatory")); + } + } + + @Test + public void testBulkLoadDigestUDFNotProvided() + { + try + { + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(true) + .digestField("digest") + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("For digest generation, digestField & digestUdfName are mandatory")); + } + } + + @Test + public void testBulkLoadStagedFilesDatasetNotProvided() + { + try + { + BulkLoad bulkLoad = BulkLoad.builder() + .digestField("digest") + .generateDigest(false) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagingDataset = DatasetDefinition.builder() + .database("my_db").name("my_stage").alias("my_alias") + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Only StagedFilesDataset are allowed under Bulk Load")); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/data/staged_file1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/data/staged_file1.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/data/staged_file1.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 2935578d08c..3d438dd721f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -223,9 +223,9 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } @Override - public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan sqlPlan, Map placeHolderKeyValues) + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { - List results = executor.executePhysicalPlanAndGetResults(sqlPlan, placeHolderKeyValues); + List results = executor.executePhysicalPlanAndGetResults(ingestSqlPlan, placeHolderKeyValues); List> resultSets = results.get(0).getData(); List dataFilePathsWithFailedBulkLoad = new ArrayList<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index ed9127b2e30..0fea1434e7d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -249,7 +249,7 @@ public void testBulkLoadDigestColumnNotProvided() .digestUdfName("LAKEHOUSE_UDF") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); - Assertions.fail("Should not happen"); + Assertions.fail("Exception was not thrown"); } catch (Exception e) { @@ -267,7 +267,7 @@ public void testBulkLoadDigestUDFNotProvided() .digestField("digest") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); - Assertions.fail("Should not happen"); + Assertions.fail("Exception was not thrown"); } catch (Exception e) { @@ -304,7 +304,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); - Assertions.fail("Should not happen"); + Assertions.fail("Exception was not thrown"); } catch (Exception e) { From 9753521553b369b57dab35cefa714a0ef9630f92 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 30 Aug 2023 13:00:14 +0800 Subject: [PATCH 23/57] Add test --- .../components/relational/h2/H2Sink.java | 29 ++++--- .../ingestmode/bulkload/BulkLoadTest.java | 80 +++++++++++++++++-- .../bulk-load/expected/expected_table1.csv | 3 + .../expected_table2.csv} | 0 .../data/bulk-load/input/staged_file1.csv | 3 + .../data/bulk-load/input/staged_file2.csv | 3 + 6 files changed, 99 insertions(+), 19 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/{data/staged_file1.csv => expected/expected_table2.csv} (100%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file2.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 20026025ab9..d14e05f23ec 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -199,28 +199,33 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = new HashMap<>(); long incomingRecordCount = 0; long rowsInserted = 0; SqlPlan incomingRecordCountSqlPlan = statisticsSqlPlan.get(StatisticName.INCOMING_RECORD_COUNT); - SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); - - List incomingRecordCountResults = executor.executePhysicalPlanAndGetResults(incomingRecordCountSqlPlan, placeHolderKeyValues); - List rowsInsertedResults = executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues); - - for (Map row: incomingRecordCountResults.get(0).getData()) + if (incomingRecordCountSqlPlan != null) { - incomingRecordCount += (Long) row.get(StatisticName.INCOMING_RECORD_COUNT.get()); + List incomingRecordCountResults = executor.executePhysicalPlanAndGetResults(incomingRecordCountSqlPlan, placeHolderKeyValues); + for (Map row: incomingRecordCountResults.get(0).getData()) + { + incomingRecordCount += (Long) row.get(StatisticName.INCOMING_RECORD_COUNT.get()); + } + stats.put(StatisticName.INCOMING_RECORD_COUNT, incomingRecordCount); } - for (Map row: rowsInsertedResults.get(0).getData()) + + SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); + if (rowsInsertedSqlPlan != null) { - rowsInserted += (Long) row.get(StatisticName.ROWS_INSERTED.get()); + List rowsInsertedResults = executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues); + for (Map row: rowsInsertedResults.get(0).getData()) + { + rowsInserted += (Long) row.get(StatisticName.ROWS_INSERTED.get()); + } + stats.put(StatisticName.ROWS_INSERTED, rowsInserted); } IngestorResult result; - Map stats = new HashMap<>(); - stats.put(StatisticName.ROWS_INSERTED, rowsInserted); - stats.put(StatisticName.INCOMING_RECORD_COUNT, incomingRecordCount); stats.put(StatisticName.FILES_LOADED, 1); result = IngestorResult.builder() .status(IngestStatus.SUCCEEDED) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 6c1f864cb95..8b772a16975 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -81,12 +82,11 @@ public class BulkLoadTest extends BaseTest protected final Clock fixedClock_2000_01_01 = Clock.fixed(fixedZonedDateTime_2000_01_01.toInstant(), ZoneOffset.UTC); @Test - public void testBulkLoadWithDigestNotGenerated() throws Exception + public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception { - String filePath = "src/test/resources/data/bulk-load/data/staged_file1.csv"; + String filePath = "src/test/resources/data/bulk-load/input/staged_file1.csv"; BulkLoad bulkLoad = BulkLoad.builder() - .digestField(DIGEST) .generateDigest(false) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -126,25 +126,91 @@ public void testBulkLoadWithDigestNotGenerated() throws Exception "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"append_time\") " + "SELECT " + "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP),'2000-01-01 00:00:00' " + - "FROM CSVREAD('src/test/resources/data/bulk-load/data/staged_file1.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime}; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, APPEND_TIME}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); - executePlansAndVerifyResults(bulkLoad, options, datasets, schema, filePath, expectedStats, fixedClock_2000_01_01); + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table1.csv"; + + executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + } + + @Test + public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception + { + String filePath = "src/test/resources/data/bulk-load/input/staged_file2.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(false) + .auditing(NoAuditing.builder().build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .addAllFiles(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\") " + + "SELECT " + + "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP) " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file2.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); + Assertions.assertNull(statsSql.get(ROWS_INSERTED)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime}; + + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table2.csv"; + + executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); } @Test diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv new file mode 100644 index 00000000000..69cb6d91ae4 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/data/staged_file1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv similarity index 100% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/data/staged_file1.csv rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file1.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file1.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file2.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file2.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file From 7cc25fd4878a38d42bcadb5333a6eb791d57ee36 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 31 Aug 2023 14:28:09 +0800 Subject: [PATCH 24/57] Add support for digest udf in h2 and add tests --- .../relational/sqldom/common/Clause.java | 3 +- .../relational/sqldom/utils/SqlGenUtils.java | 2 + .../components/relational/h2/H2Sink.java | 35 ++- .../values/ToArrayFunctionAbstract.java | 34 +++ .../h2/sql/visitor/DigestUdfVisitor.java | 51 ++++ .../sql/visitor/ToArrayFunctionVisitor.java | 67 +++++ .../schemaops/values/ToArrayFunction.java | 82 ++++++ .../h2/sqldom/schemaops/values/Udf.java | 46 ++++ .../ingestmode/bulkload/BulkLoadTest.java | 241 ++++++++++++++++++ .../bulk-load/expected/expected_table3.csv | 3 + .../bulk-load/expected/expected_table4.csv | 3 + .../bulk-load/expected/expected_table5.csv | 3 + .../data/bulk-load/input/staged_file3.csv | 3 + .../data/bulk-load/input/staged_file4.csv | 3 + .../data/bulk-load/input/staged_file5.csv | 3 + 15 files changed, 565 insertions(+), 14 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file3.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file4.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index a69e1672d9c..1086d1e2c06 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -57,7 +57,8 @@ public enum Clause CLUSTER_BY("CLUSTER BY"), NOT_ENFORCED("NOT ENFORCED"), DATA_TYPE("DATA TYPE"), - CONVERT("CONVERT"); + CONVERT("CONVERT"), + ARRAY("ARRAY"); private final String clause; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/utils/SqlGenUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/utils/SqlGenUtils.java index d4716474462..0c1577e13aa 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/utils/SqlGenUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/utils/SqlGenUtils.java @@ -21,6 +21,8 @@ public class SqlGenUtils public static final String COMMA = ","; public static final String OPEN_PARENTHESIS = "("; public static final String CLOSING_PARENTHESIS = ")"; + public static final String OPEN_SQUARE_BRACKET = "["; + public static final String CLOSING_SQUARE_BRACKET = "]"; public static final String EMPTY = ""; public static final String QUOTE_IDENTIFIER = "\"%s\""; public static final String SINGLE_QUOTE_IDENTIFIER = "'%s'"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index d14e05f23ec..bdfd48cf531 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -14,7 +14,6 @@ package org.finos.legend.engine.persistence.components.relational.h2; -import java.util.List; import java.util.Optional; import org.finos.legend.engine.persistence.components.common.Datasets; @@ -29,6 +28,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.LoadCsv; +import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.HashFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; @@ -41,10 +41,12 @@ import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2JdbcPropertiesToLogicalDataTypeMapping; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CopyVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.CsvExternalDatasetReferenceVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.DigestUdfVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.HashFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.LoadCsvVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.SchemaDefinitionVisitor; @@ -54,6 +56,7 @@ import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesDatasetVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesFieldValueVisitor; import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.StagedFilesSelectionVisitor; +import org.finos.legend.engine.persistence.components.relational.h2.sql.visitor.ToArrayFunctionVisitor; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.util.Capability; @@ -108,6 +111,8 @@ public class H2Sink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesSelection.class, new StagedFilesSelectionVisitor()); logicalPlanVisitorByClass.put(StagedFilesDatasetReference.class, new StagedFilesDatasetReferenceVisitor()); logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); + logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); + logicalPlanVisitorByClass.put(ToArrayFunction.class, new ToArrayFunctionVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); Map> implicitDataTypeMapping = new HashMap<>(); @@ -200,28 +205,32 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = new HashMap<>(); - long incomingRecordCount = 0; - long rowsInserted = 0; SqlPlan incomingRecordCountSqlPlan = statisticsSqlPlan.get(StatisticName.INCOMING_RECORD_COUNT); if (incomingRecordCountSqlPlan != null) { - List incomingRecordCountResults = executor.executePhysicalPlanAndGetResults(incomingRecordCountSqlPlan, placeHolderKeyValues); - for (Map row: incomingRecordCountResults.get(0).getData()) - { - incomingRecordCount += (Long) row.get(StatisticName.INCOMING_RECORD_COUNT.get()); - } + long incomingRecordCount = (Long) executor.executePhysicalPlanAndGetResults(incomingRecordCountSqlPlan, placeHolderKeyValues) + .stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .map(Map::values) + .flatMap(t -> t.stream().findFirst()) + .orElseThrow(IllegalStateException::new); stats.put(StatisticName.INCOMING_RECORD_COUNT, incomingRecordCount); } SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); if (rowsInsertedSqlPlan != null) { - List rowsInsertedResults = executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues); - for (Map row: rowsInsertedResults.get(0).getData()) - { - rowsInserted += (Long) row.get(StatisticName.ROWS_INSERTED.get()); - } + long rowsInserted = (Long) executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues) + .stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .map(Map::values) + .flatMap(t -> t.stream().findFirst()) + .orElseThrow(IllegalStateException::new); stats.put(StatisticName.ROWS_INSERTED, rowsInserted); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java new file mode 100644 index 00000000000..ca7fd5e7997 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java @@ -0,0 +1,34 @@ +// Copyright 2022 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values; + +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.immutables.value.Value.Immutable; +import org.immutables.value.Value.Style; + +import java.util.List; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface ToArrayFunctionAbstract extends Value +{ + List value(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java new file mode 100644 index 00000000000..817943cf1d2 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java @@ -0,0 +1,51 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; +import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.Udf; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class DigestUdfVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, DigestUdf current, VisitorContext context) + { + Udf udf = new Udf(context.quoteIdentifier(), current.udfName()); + prev.push(udf); + List columnNameList = new ArrayList<>(); + List columnValueList = new ArrayList<>(); + for (int i = 0; i < current.values().size(); i++) + { + columnNameList.add(StringValue.of(current.fieldNames().get(i))); + columnValueList.add(current.values().get(i)); + } + + ToArrayFunction columnNames = ToArrayFunction.builder().addAllValue(columnNameList).build(); + ToArrayFunction columnValues = ToArrayFunction.builder().addAllValue(columnValueList).build(); + + return new VisitorResult(udf, Arrays.asList(columnNames, columnValues)); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java new file mode 100644 index 00000000000..26a58d4247b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java @@ -0,0 +1,67 @@ +// Copyright 2022 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.optimizer.Optimizer; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class ToArrayFunctionVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, ToArrayFunction current, VisitorContext context) + { + org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.ToArrayFunction function = + new org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.ToArrayFunction( + new ArrayList<>(), + current.alias().orElse(null), + context.quoteIdentifier() + ); + + for (Optimizer optimizer : context.optimizers()) + { + function = (org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.ToArrayFunction) optimizer.optimize(function); + } + prev.push(function); + + if (current.value() != null) + { + List logicalPlanNodeList = new ArrayList<>(); + for (Value value : current.value()) + { + if (value instanceof StagedFilesFieldValue) + { + logicalPlanNodeList.add(FieldValue.builder().fieldName(((StagedFilesFieldValue) value).fieldName()).build()); + } + else + { + logicalPlanNodeList.add(value); + } + } + return new VisitorResult(function, logicalPlanNodeList); + } + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java new file mode 100644 index 00000000000..e83de043a16 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java @@ -0,0 +1,82 @@ +// Copyright 2022 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import java.util.List; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_SQUARE_BRACKET; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_SQUARE_BRACKET; + +public class ToArrayFunction extends Value +{ + protected List values; + + public ToArrayFunction(String quoteIdentifier) + { + super(quoteIdentifier); + } + + public ToArrayFunction(List values, String quoteIdentifier) + { + super(quoteIdentifier); + this.values = values; + } + + public ToArrayFunction(List values, String alias, String quoteIdentifier) + { + super(alias, quoteIdentifier); + this.values = values; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + builder.append(Clause.ARRAY.get()); + builder.append(OPEN_SQUARE_BRACKET); + if (values != null) + { + for (int ctr = 0; ctr < values.size(); ctr++) + { + values.get(ctr).genSqlWithoutAlias(builder); + if (ctr < (values.size() - 1)) + { + builder.append(COMMA); + } + } + } + builder.append(CLOSING_SQUARE_BRACKET); + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + values.add((Value) node); + } + } +} \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java new file mode 100644 index 00000000000..fc8b0e3dd18 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java @@ -0,0 +1,46 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Function; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import java.util.ArrayList; + +public class Udf extends Function +{ + private String udfName; + + public Udf(String quoteIdentifier, String udfName) + { + super(quoteIdentifier); + this.udfName = udfName; + super.values = new ArrayList<>(); + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + values.add((Value) node); + } + } + + public String getFunctionName() + { + return udfName; + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 8b772a16975..8cc31ca3013 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -28,6 +28,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; import org.finos.legend.engine.persistence.components.planner.PlannerOptions; +import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; @@ -55,6 +56,8 @@ public class BulkLoadTest extends BaseTest { private static final String APPEND_TIME = "append_time"; private static final String DIGEST = "digest"; + private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; + private static final String LINEAGE = "lake_lineage"; private static final String col_int = "col_int"; private static final String col_string = "col_string"; private static final String col_decimal = "col_decimal"; @@ -213,6 +216,244 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); } + @Test + public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception + { + // Register UDF + h2Sink.executeStatement("CREATE ALIAS " + DIGEST_UDF + " AS '\n" + + "String concat(String[] values, String[] values2) {\n" + + " return String.join(\"-\", values) + String.join(\"-\", values2);\n" + + "}\n" + + "';"); + + String filePath = "src/test/resources/data/bulk-load/input/staged_file3.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(true) + .digestField(DIGEST) + .digestUdfName(DIGEST_UDF) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .addAllFiles(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"append_time\" TIMESTAMP)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"append_time\") " + + "SELECT " + + "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"]),'2000-01-01 00:00:00' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, APPEND_TIME}; + + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); + expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table3.csv"; + + executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + } + + @Test + public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws Exception + { + // Register UDF + h2Sink.executeStatement("CREATE ALIAS " + DIGEST_UDF + " AS '\n" + + "String concat(String[] values, String[] values2) {\n" + + " return String.join(\"-\", values) + String.join(\"-\", values2);\n" + + "}\n" + + "';"); + + String filePath = "src/test/resources/data/bulk-load/input/staged_file4.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(true) + .digestField(DIGEST) + .digestUdfName(DIGEST_UDF) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .lineageField(LINEAGE) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .addAllFiles(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"append_time\" TIMESTAMP,\"lake_lineage\" VARCHAR)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"append_time\", \"lake_lineage\") " + + "SELECT " + + "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"])," + + "'2000-01-01 00:00:00'," + + "'src/test/resources/data/bulk-load/input/staged_file4.csv' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file4.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); + Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, APPEND_TIME, LINEAGE}; + + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); + expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table4.csv"; + + executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + } + + @Test + public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() throws Exception + { + // Register UDF + h2Sink.executeStatement("CREATE ALIAS " + DIGEST_UDF + " AS '\n" + + "String concat(String[] values, String[] values2) {\n" + + " return String.join(\"-\", values) + String.join(\"-\", values2);\n" + + "}\n" + + "';"); + + String filePath = "src/test/resources/data/bulk-load/input/staged_file5.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(true) + .digestField(DIGEST) + .digestUdfName(DIGEST_UDF) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .lineageField(LINEAGE) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .addAllFiles(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + // Verify SQLs using generator + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .caseConversion(CaseConversion.TO_UPPER) + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN\"" + + "(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" DECIMAL(5,2),\"COL_DATETIME\" TIMESTAMP,\"DIGEST\" VARCHAR,\"APPEND_TIME\" TIMESTAMP,\"LAKE_LINEAGE\" VARCHAR)"; + + String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN\" " + + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"DIGEST\", \"APPEND_TIME\", \"LAKE_LINEAGE\") " + + "SELECT " + + "CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + + "LAKEHOUSE_MD5(ARRAY['COL_INT','COL_STRING','COL_DECIMAL','COL_DATETIME'],ARRAY[\"COL_INT\",\"COL_STRING\",\"COL_DECIMAL\",\"COL_DATETIME\"])," + + "'2000-01-01 00:00:00'," + + "'src/test/resources/data/bulk-load/input/staged_file5.csv' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file5.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals("SELECT COUNT(*) as \"INCOMINGRECORDCOUNT\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); + Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); + + + // Verify execution using ingestor + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{col_int.toUpperCase(), col_string.toUpperCase(), col_decimal.toUpperCase(), col_datetime.toUpperCase(), DIGEST.toUpperCase(), APPEND_TIME.toUpperCase(), LINEAGE.toUpperCase()}; + + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); + expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); + expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table5.csv"; + + executePlansAndVerifyForCaseConversion(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + } + @Test public void testBulkLoadDigestColumnNotProvided() { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv new file mode 100644 index 00000000000..2d9ecb9732b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0,col_int-col_string-col_decimal-col_datetime1-Andy-5.20-2022-01-11 00:00:00.0,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,col_int-col_string-col_decimal-col_datetime2-Bella-99.99-2022-01-12 00:00:00.0,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,col_int-col_string-col_decimal-col_datetime49-Sandy-123.45-2022-01-13 00:00:00.0,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv new file mode 100644 index 00000000000..a99b537a9ae --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0,col_int-col_string-col_decimal-col_datetime1-Andy-5.20-2022-01-11 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv +2,Bella,99.99,2022-01-12 00:00:00.0,col_int-col_string-col_decimal-col_datetime2-Bella-99.99-2022-01-12 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv +49,Sandy,123.45,2022-01-13 00:00:00.0,col_int-col_string-col_decimal-col_datetime49-Sandy-123.45-2022-01-13 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv new file mode 100644 index 00000000000..73221c1e123 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0,COL_INT-COL_STRING-COL_DECIMAL-COL_DATETIME1-Andy-5.20-2022-01-11 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv +2,Bella,99.99,2022-01-12 00:00:00.0,COL_INT-COL_STRING-COL_DECIMAL-COL_DATETIME2-Bella-99.99-2022-01-12 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv +49,Sandy,123.45,2022-01-13 00:00:00.0,COL_INT-COL_STRING-COL_DECIMAL-COL_DATETIME49-Sandy-123.45-2022-01-13 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file3.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file3.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file4.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file4.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file From a3c7f7444cd059ea5dcd4f9f67dd5a10a7702d9f Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 31 Aug 2023 14:39:47 +0800 Subject: [PATCH 25/57] Clean up --- .../h2/logicalplan/values/ToArrayFunctionAbstract.java | 2 +- .../relational/h2/sql/visitor/DigestUdfVisitor.java | 4 ++-- .../h2/sql/visitor/StagedFilesFieldValueVisitor.java | 8 +------- .../relational/h2/sql/visitor/ToArrayFunctionVisitor.java | 4 ++-- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java index ca7fd5e7997..455b9f6a415 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java @@ -30,5 +30,5 @@ ) public interface ToArrayFunctionAbstract extends Value { - List value(); + List values(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java index 817943cf1d2..00092582088 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java @@ -43,8 +43,8 @@ public VisitorResult visit(PhysicalPlanNode prev, DigestUdf current, VisitorCont columnValueList.add(current.values().get(i)); } - ToArrayFunction columnNames = ToArrayFunction.builder().addAllValue(columnNameList).build(); - ToArrayFunction columnValues = ToArrayFunction.builder().addAllValue(columnValueList).build(); + ToArrayFunction columnNames = ToArrayFunction.builder().addAllValues(columnNameList).build(); + ToArrayFunction columnValues = ToArrayFunction.builder().addAllValues(columnValueList).build(); return new VisitorResult(udf, Arrays.asList(columnNames, columnValues)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java index ad8fbd0cce8..92dc1d96dfe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesFieldValueVisitor.java @@ -18,7 +18,6 @@ import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.h2.sql.H2DataTypeMapping; import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.StagedFilesField; -import org.finos.legend.engine.persistence.components.relational.sql.DataTypeMapping; import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; @@ -29,14 +28,9 @@ public class StagedFilesFieldValueVisitor implements LogicalPlanVisitor logicalPlanNodeList = new ArrayList<>(); - for (Value value : current.value()) + for (Value value : current.values()) { if (value instanceof StagedFilesFieldValue) { From a1bb51adb8639bb0c514e3937f9a8c7f9d1f2ffd Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 31 Aug 2023 17:24:20 +0800 Subject: [PATCH 26/57] Add file format and validation for file format and add tests --- .../components/common/FileFormat.java | 23 ++++++++++ ...2StagedFilesDatasetPropertiesAbstract.java | 7 +++ .../ingestmode/bulkload/BulkLoadTest.java | 44 +++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java new file mode 100644 index 00000000000..c73082518ed --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java @@ -0,0 +1,23 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +public enum FileFormat +{ + CSV, + JSON, + AVRO, + PARQUET +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java index c3651179add..1e5a2ac8ac0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets; +import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; @@ -29,6 +30,8 @@ ) public interface H2StagedFilesDatasetPropertiesAbstract extends StagedFilesDatasetProperties { + FileFormat fileFormat(); + @Value.Check default void validate() { @@ -36,5 +39,9 @@ default void validate() { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only 1 file per load supported"); } + if (!fileFormat().equals(FileFormat.CSV)) + { + throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only CSV file loading supported"); + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 8cc31ca3013..fea8c84465f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.BaseTest; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; @@ -97,6 +98,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -164,6 +166,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -238,6 +241,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -316,6 +320,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws E Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -396,6 +401,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -526,4 +532,42 @@ public void testBulkLoadStagedFilesDatasetNotProvided() Assertions.assertTrue(e.getMessage().contains("Only StagedFilesDataset are allowed under Bulk Load")); } } + + @Test + public void testBulkLoadMoreThanOneFile() + { + try + { + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.csv", "src/test/resources/data/bulk-load/input/staged_file2.csv")).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Cannot build H2StagedFilesDatasetProperties, only 1 file per load supported")); + } + } + + @Test + public void testBulkLoadNotCsvFile() + { + try + { + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.JSON) + .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.json")).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Cannot build H2StagedFilesDatasetProperties, only CSV file loading supported")); + } + } } From edeef497a950c2e2919591f7d1cce3371a1620e8 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 31 Aug 2023 17:27:12 +0800 Subject: [PATCH 27/57] Add missing statement --- .../components/ingestmode/bulkload/BulkLoadTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index fea8c84465f..f788c382705 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -545,6 +545,7 @@ public void testBulkLoadMoreThanOneFile() .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.csv", "src/test/resources/data/bulk-load/input/staged_file2.csv")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); + Assertions.fail("Exception was not thrown"); } catch (Exception e) { @@ -564,6 +565,7 @@ public void testBulkLoadNotCsvFile() .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.json")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); + Assertions.fail("Exception was not thrown"); } catch (Exception e) { From b067e6efdf1c79a479f86fe1b81f1f0f3f533226 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Sep 2023 12:48:28 +0800 Subject: [PATCH 28/57] Fix typo in year --- .../logicalplan/datasets/StagedFilesSelectionAbstract.java | 2 +- .../h2/logicalplan/values/ToArrayFunctionAbstract.java | 2 +- .../relational/h2/sql/visitor/StagedFilesSelectionVisitor.java | 2 +- .../relational/h2/sql/visitor/ToArrayFunctionVisitor.java | 2 +- .../relational/h2/sqldom/schemaops/values/ToArrayFunction.java | 2 +- .../snowflake/sql/visitor/StagedFilesSelectionVisitor.java | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java index 60efdfb69f7..2ade008e70c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/StagedFilesSelectionAbstract.java @@ -1,4 +1,4 @@ -// Copyright 2022 Goldman Sachs +// Copyright 2023 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java index 455b9f6a415..8e96e061c24 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/values/ToArrayFunctionAbstract.java @@ -1,4 +1,4 @@ -// Copyright 2022 Goldman Sachs +// Copyright 2023 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java index d8c789e9bfc..440691942e5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/StagedFilesSelectionVisitor.java @@ -1,4 +1,4 @@ -// Copyright 2022 Goldman Sachs +// Copyright 2023 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java index 2bc9455a535..1b993221bc6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/ToArrayFunctionVisitor.java @@ -1,4 +1,4 @@ -// Copyright 2022 Goldman Sachs +// Copyright 2023 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java index e83de043a16..f98987943ef 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/ToArrayFunction.java @@ -1,4 +1,4 @@ -// Copyright 2022 Goldman Sachs +// Copyright 2023 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesSelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesSelectionVisitor.java index 88de0dc64b1..ff8768dc4e1 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesSelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/StagedFilesSelectionVisitor.java @@ -1,4 +1,4 @@ -// Copyright 2022 Goldman Sachs +// Copyright 2023 Goldman Sachs // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From 528405b4cfad01b6eef23b79364bf96ab19390b6 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Sep 2023 13:02:54 +0800 Subject: [PATCH 29/57] Fix comments --- .../sqldom/schemaops/values/Udf.java | 5 +- .../components/relational/h2/H2Sink.java | 32 ++++++------- .../h2/sql/visitor/DigestUdfVisitor.java | 2 +- .../schemaops/statements/CopyStatement.java | 2 +- .../sql/visitor/DigestUdfVisitor.java | 2 +- .../sqldom/schemaops/values/Udf.java | 46 ------------------- 6 files changed, 18 insertions(+), 71 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/{legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2 => legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational}/sqldom/schemaops/values/Udf.java (83%) delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/Udf.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/Udf.java similarity index 83% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/Udf.java index fc8b0e3dd18..73189999c62 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/values/Udf.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/schemaops/values/Udf.java @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values; - -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Function; -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; +package org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values; import java.util.ArrayList; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index bdfd48cf531..4e8c992ae19 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -209,29 +209,13 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new); - stats.put(StatisticName.INCOMING_RECORD_COUNT, incomingRecordCount); + stats.put(StatisticName.INCOMING_RECORD_COUNT, getStats(executor, incomingRecordCountSqlPlan, placeHolderKeyValues)); } SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); if (rowsInsertedSqlPlan != null) { - long rowsInserted = (Long) executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues) - .stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new); - stats.put(StatisticName.ROWS_INSERTED, rowsInserted); + stats.put(StatisticName.ROWS_INSERTED, getStats(executor, rowsInsertedSqlPlan, placeHolderKeyValues)); } IngestorResult result; @@ -245,4 +229,16 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan sqlPlan, Map placeHolderKeyValues) + { + return (Long) executor.executePhysicalPlanAndGetResults(sqlPlan, placeHolderKeyValues) + .stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .map(Map::values) + .flatMap(t -> t.stream().findFirst()) + .orElseThrow(IllegalStateException::new); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java index 00092582088..f40354b288d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sql/visitor/DigestUdfVisitor.java @@ -19,7 +19,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.values.ToArrayFunction; -import org.finos.legend.engine.persistence.components.relational.h2.sqldom.schemaops.values.Udf; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Udf; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java index 9849a55e18a..044369e80f2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/sqldom/schemaops/statements/CopyStatement.java @@ -51,7 +51,7 @@ public CopyStatement(Table table, List columns, SelectStatement selectSta Copy GENERIC PLAN for H2: INSERT INTO table_name (COLUMN_LIST) SELECT [CONVERT("column_name", column_type) , ...] - FROM CSVREAD('{FILE_PATH}','{CSV_COLUMN_NAMES}',NULL) + FROM CSVREAD('{FILE_PATH}','{CSV_COLUMN_NAMES}','{CSV_OPTIONS}') */ @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DigestUdfVisitor.java index 85ff159f45e..d0c92e06f30 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DigestUdfVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sql/visitor/DigestUdfVisitor.java @@ -20,7 +20,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; -import org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values.Udf; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Udf; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/Udf.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/Udf.java deleted file mode 100644 index 12ff50949db..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/sqldom/schemaops/values/Udf.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.relational.snowflake.sqldom.schemaops.values; - -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Function; -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; - -import java.util.ArrayList; - -public class Udf extends Function -{ - private String udfName; - - public Udf(String quoteIdentifier, String udfName) - { - super(quoteIdentifier); - this.udfName = udfName; - super.values = new ArrayList<>(); - } - - @Override - public void push(Object node) - { - if (node instanceof Value) - { - values.add((Value) node); - } - } - - public String getFunctionName() - { - return udfName; - } -} From db7a7dd51abbbf4076578051701b99d410c7c394 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 4 Sep 2023 15:36:38 +0800 Subject: [PATCH 30/57] Add H2 MD5 --- .../pom.xml | 6 ++++ .../relational/h2/H2DigestUtil.java | 35 +++++++++++++++++++ .../ingestmode/bulkload/BulkLoadTest.java | 19 +++------- .../bulk-load/expected/expected_table3.csv | 6 ++-- .../bulk-load/expected/expected_table4.csv | 6 ++-- .../bulk-load/expected/expected_table5.csv | 6 ++-- 6 files changed, 54 insertions(+), 24 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml index 59e37d4640f..c8ec93bf632 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/pom.xml @@ -59,6 +59,12 @@ runtime + + + commons-codec + commons-codec + + org.junit.jupiter diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java new file mode 100644 index 00000000000..82a7f3788f7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java @@ -0,0 +1,35 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.h2; + +import org.apache.commons.codec.digest.DigestUtils; +import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; + + +public class H2DigestUtil +{ + public static void registerMD5Udf(JdbcHelper sink, String UdfName) + { + sink.executeStatement("CREATE ALIAS " + UdfName + " FOR \"org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil.MD5\";"); + } + + public static String MD5(String[] columnNameList, String[] columnValueList) + { + String columnNames = String.join("", columnNameList); + String columnValues = String.join("", columnValueList); + String columnNamesAndColumnValues = columnNames + columnValues; + return DigestUtils.md5Hex(columnNamesAndColumnValues).toUpperCase(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index f788c382705..507293c0113 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -32,6 +32,7 @@ import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; +import org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; import org.junit.jupiter.api.Assertions; @@ -223,11 +224,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception { // Register UDF - h2Sink.executeStatement("CREATE ALIAS " + DIGEST_UDF + " AS '\n" + - "String concat(String[] values, String[] values2) {\n" + - " return String.join(\"-\", values) + String.join(\"-\", values2);\n" + - "}\n" + - "';"); + H2DigestUtil.registerMD5Udf(h2Sink, DIGEST_UDF); String filePath = "src/test/resources/data/bulk-load/input/staged_file3.csv"; @@ -301,11 +298,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws Exception { // Register UDF - h2Sink.executeStatement("CREATE ALIAS " + DIGEST_UDF + " AS '\n" + - "String concat(String[] values, String[] values2) {\n" + - " return String.join(\"-\", values) + String.join(\"-\", values2);\n" + - "}\n" + - "';"); + H2DigestUtil.registerMD5Udf(h2Sink, DIGEST_UDF); String filePath = "src/test/resources/data/bulk-load/input/staged_file4.csv"; @@ -382,11 +375,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws E public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() throws Exception { // Register UDF - h2Sink.executeStatement("CREATE ALIAS " + DIGEST_UDF + " AS '\n" + - "String concat(String[] values, String[] values2) {\n" + - " return String.join(\"-\", values) + String.join(\"-\", values2);\n" + - "}\n" + - "';"); + H2DigestUtil.registerMD5Udf(h2Sink, DIGEST_UDF); String filePath = "src/test/resources/data/bulk-load/input/staged_file5.csv"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv index 2d9ecb9732b..fb82c08158b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,col_int-col_string-col_decimal-col_datetime1-Andy-5.20-2022-01-11 00:00:00.0,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,col_int-col_string-col_decimal-col_datetime2-Bella-99.99-2022-01-12 00:00:00.0,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,col_int-col_string-col_decimal-col_datetime49-Sandy-123.45-2022-01-13 00:00:00.0,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,6366D6AFD9E8B991393E719A5A4E6D35,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,C556B5DC2B9F3A66000202DF9D98EC05,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,051D68CF86951CDE0DF875915940AEC6,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv index a99b537a9ae..395da23b52f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,col_int-col_string-col_decimal-col_datetime1-Andy-5.20-2022-01-11 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv -2,Bella,99.99,2022-01-12 00:00:00.0,col_int-col_string-col_decimal-col_datetime2-Bella-99.99-2022-01-12 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv -49,Sandy,123.45,2022-01-13 00:00:00.0,col_int-col_string-col_decimal-col_datetime49-Sandy-123.45-2022-01-13 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,6366D6AFD9E8B991393E719A5A4E6D35,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv +2,Bella,99.99,2022-01-12 00:00:00.0,C556B5DC2B9F3A66000202DF9D98EC05,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv +49,Sandy,123.45,2022-01-13 00:00:00.0,051D68CF86951CDE0DF875915940AEC6,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv index 73221c1e123..7d90d71c952 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,COL_INT-COL_STRING-COL_DECIMAL-COL_DATETIME1-Andy-5.20-2022-01-11 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv -2,Bella,99.99,2022-01-12 00:00:00.0,COL_INT-COL_STRING-COL_DECIMAL-COL_DATETIME2-Bella-99.99-2022-01-12 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv -49,Sandy,123.45,2022-01-13 00:00:00.0,COL_INT-COL_STRING-COL_DECIMAL-COL_DATETIME49-Sandy-123.45-2022-01-13 00:00:00.0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,4B39799C7A1FB5EFC4BC328966A159E0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv +2,Bella,99.99,2022-01-12 00:00:00.0,58467B440BCED7607369DC8A260B0607,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv +49,Sandy,123.45,2022-01-13 00:00:00.0,29B8C8A6CD28B069290372E6B54B6C72,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv \ No newline at end of file From 42ff733ecae78867f5cb99a71c58feb7b027e7f5 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 5 Sep 2023 15:03:06 +0800 Subject: [PATCH 31/57] Change file format interface --- .../common/AvroFileFormatAbstract.java | 29 +++++++++++++++++++ .../common/CsvFileFormatAbstract.java | 29 +++++++++++++++++++ .../components/common/FileFormat.java | 6 +--- .../common/JsonFileFormatAbstract.java | 29 +++++++++++++++++++ .../common/ParquetFileFormatAbstract.java | 29 +++++++++++++++++++ ...2StagedFilesDatasetPropertiesAbstract.java | 3 +- .../ingestmode/bulkload/BulkLoadTest.java | 17 ++++++----- 7 files changed, 128 insertions(+), 14 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java new file mode 100644 index 00000000000..7ba38d2c0b5 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java @@ -0,0 +1,29 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface AvroFileFormatAbstract extends FileFormat +{ +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java new file mode 100644 index 00000000000..fb7aae5d5ca --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java @@ -0,0 +1,29 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface CsvFileFormatAbstract extends FileFormat +{ +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java index c73082518ed..90085b358c4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java @@ -14,10 +14,6 @@ package org.finos.legend.engine.persistence.components.common; -public enum FileFormat +public interface FileFormat { - CSV, - JSON, - AVRO, - PARQUET } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java new file mode 100644 index 00000000000..40d78c1730d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java @@ -0,0 +1,29 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface JsonFileFormatAbstract extends FileFormat +{ +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java new file mode 100644 index 00000000000..d03d72a2306 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java @@ -0,0 +1,29 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface ParquetFileFormatAbstract extends FileFormat +{ +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java index 1e5a2ac8ac0..098b454df28 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets; +import org.finos.legend.engine.persistence.components.common.CsvFileFormat; import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; @@ -39,7 +40,7 @@ default void validate() { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only 1 file per load supported"); } - if (!fileFormat().equals(FileFormat.CSV)) + if (!(fileFormat() instanceof CsvFileFormat)) { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only CSV file loading supported"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 507293c0113..bcb3595de6d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -15,8 +15,9 @@ package org.finos.legend.engine.persistence.components.ingestmode.bulkload; import org.finos.legend.engine.persistence.components.BaseTest; +import org.finos.legend.engine.persistence.components.common.CsvFileFormat; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.JsonFileFormat; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; @@ -99,7 +100,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) + .fileFormat(CsvFileFormat.builder().build()) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -167,7 +168,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) + .fileFormat(CsvFileFormat.builder().build()) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -238,7 +239,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) + .fileFormat(CsvFileFormat.builder().build()) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -313,7 +314,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws E Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) + .fileFormat(CsvFileFormat.builder().build()) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -390,7 +391,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) + .fileFormat(CsvFileFormat.builder().build()) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -530,7 +531,7 @@ public void testBulkLoadMoreThanOneFile() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) + .fileFormat(CsvFileFormat.builder().build()) .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.csv", "src/test/resources/data/bulk-load/input/staged_file2.csv")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -550,7 +551,7 @@ public void testBulkLoadNotCsvFile() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.JSON) + .fileFormat(JsonFileFormat.builder().build()) .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.json")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); From a8726a94521957fce6835fd1efd78caa8313bed0 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 7 Sep 2023 17:18:08 +0800 Subject: [PATCH 32/57] Change stats --- .../components/planner/BulkLoadPlanner.java | 11 +------ .../components/relational/h2/H2Sink.java | 30 +++++++------------ .../ingestmode/bulkload/BulkLoadTest.java | 15 ++++------ .../components/ingestmode/BulkLoadTest.java | 3 -- 4 files changed, 16 insertions(+), 43 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 2780ef791a4..4fffec17bcc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -42,7 +42,6 @@ import java.util.*; import java.util.stream.Collectors; -import static org.finos.legend.engine.persistence.components.common.StatisticName.INCOMING_RECORD_COUNT; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; class BulkLoadPlanner extends Planner @@ -145,15 +144,7 @@ public void addPostRunStatsForRowsInserted(Map postR @Override protected void addPostRunStatsForIncomingRecords(Map postRunStatisticsResult) { - // Only supported if Audit enabled - if (ingestMode().auditing().accept(AUDIT_ENABLED)) - { - // Rows inserted = rows in main with audit column equals latest timestamp - String auditField = ingestMode().auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); - postRunStatisticsResult.put(INCOMING_RECORD_COUNT, LogicalPlan.builder() - .addOps(getRowsBasedOnAppendTimestamp(mainDataset(), auditField, INCOMING_RECORD_COUNT.get())) - .build()); - } + // Not supported at the moment } private Selection getRowsBasedOnAppendTimestamp(Dataset dataset, String field, String alias) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 4e8c992ae19..9f93c193c6f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -205,21 +205,23 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = new HashMap<>(); - - SqlPlan incomingRecordCountSqlPlan = statisticsSqlPlan.get(StatisticName.INCOMING_RECORD_COUNT); - if (incomingRecordCountSqlPlan != null) - { - stats.put(StatisticName.INCOMING_RECORD_COUNT, getStats(executor, incomingRecordCountSqlPlan, placeHolderKeyValues)); - } + stats.put(StatisticName.FILES_LOADED, 1); + stats.put(StatisticName.ROWS_WITH_ERRORS, 0); SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); if (rowsInsertedSqlPlan != null) { - stats.put(StatisticName.ROWS_INSERTED, getStats(executor, rowsInsertedSqlPlan, placeHolderKeyValues)); + stats.put(StatisticName.ROWS_INSERTED, executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues) + .stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .map(Map::values) + .flatMap(t -> t.stream().findFirst()) + .orElseThrow(IllegalStateException::new)); } IngestorResult result; - stats.put(StatisticName.FILES_LOADED, 1); result = IngestorResult.builder() .status(IngestStatus.SUCCEEDED) .updatedDatasets(datasets) @@ -229,16 +231,4 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan sqlPlan, Map placeHolderKeyValues) - { - return (Long) executor.executePhysicalPlanAndGetResults(sqlPlan, placeHolderKeyValues) - .stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new); - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 507293c0113..bf3a91e7ed7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -51,7 +51,6 @@ import static org.finos.legend.engine.persistence.components.TestUtils.mainTableName; import static org.finos.legend.engine.persistence.components.TestUtils.testDatabaseName; import static org.finos.legend.engine.persistence.components.TestUtils.testSchemaName; -import static org.finos.legend.engine.persistence.components.common.StatisticName.INCOMING_RECORD_COUNT; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; public class BulkLoadTest extends BaseTest @@ -136,7 +135,6 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); @@ -145,9 +143,9 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, APPEND_TIME}; Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table1.csv"; @@ -204,7 +202,6 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertNull(statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertNull(statsSql.get(ROWS_INSERTED)); @@ -214,6 +211,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table2.csv"; @@ -276,7 +274,6 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); @@ -285,9 +282,9 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, APPEND_TIME}; Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table3.csv"; @@ -353,7 +350,6 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws E Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); @@ -362,9 +358,9 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws E String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, APPEND_TIME, LINEAGE}; Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table4.csv"; @@ -431,7 +427,6 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"INCOMINGRECORDCOUNT\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"TEST_DB\".\"TEST\".\"MAIN\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); @@ -440,9 +435,9 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() String[] schema = new String[]{col_int.toUpperCase(), col_string.toUpperCase(), col_decimal.toUpperCase(), col_datetime.toUpperCase(), DIGEST.toUpperCase(), APPEND_TIME.toUpperCase(), LINEAGE.toUpperCase()}; Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.INCOMING_RECORD_COUNT.name(), 3); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table5.csv"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 0fea1434e7d..ae5ee8aa6af 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -122,7 +122,6 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); } @@ -235,7 +234,6 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() Assertions.assertEquals("SELECT 0 as \"ROWSDELETED\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"ROWSTERMINATED\"", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as \"ROWSUPDATED\"", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as \"INCOMINGRECORDCOUNT\" FROM \"MY_DB\".\"MY_NAME\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"ROWSINSERTED\" FROM \"MY_DB\".\"MY_NAME\" as my_alias WHERE my_alias.\"APPEND_TIME\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); } @@ -368,7 +366,6 @@ public void testBulkLoadWithDigestAndLineage() Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); Assertions.assertEquals("SELECT 0 as \"rowsUpdated\"", statsSql.get(ROWS_UPDATED)); - Assertions.assertEquals("SELECT COUNT(*) as \"incomingRecordCount\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(INCOMING_RECORD_COUNT)); Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"my_db\".\"my_name\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); } } From f36e6bf366a7386784168fc3ec81b692b8c11c1a Mon Sep 17 00:00:00 2001 From: kumuwu Date: Mon, 11 Sep 2023 15:49:50 +0800 Subject: [PATCH 33/57] Change stats - make snowflake always return stats no matter success or failure --- .../relational/snowflake/SnowflakeSink.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 3d438dd721f..b701708aa45 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -254,13 +254,15 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = new HashMap<>(); + stats.put(StatisticName.ROWS_INSERTED, totalRowsLoaded); + stats.put(StatisticName.ROWS_WITH_ERRORS, totalRowsWithError); + stats.put(StatisticName.FILES_LOADED, totalFilesLoaded); + if (dataFilePathsWithFailedBulkLoad.isEmpty()) { - Map stats = new HashMap<>(); - stats.put(StatisticName.ROWS_INSERTED, totalRowsLoaded); - stats.put(StatisticName.ROWS_WITH_ERRORS, totalRowsWithError); - stats.put(StatisticName.FILES_LOADED, totalFilesLoaded); result = IngestorResult.builder() .status(IngestStatus.SUCCEEDED) .updatedDatasets(datasets) @@ -275,6 +277,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor Date: Tue, 12 Sep 2023 15:10:23 +0800 Subject: [PATCH 34/57] Implement bulk load in using copy and insert + modify interface for capabilities --- .../components/planner/AppendOnlyPlanner.java | 6 +- .../planner/BitemporalDeltaPlanner.java | 6 +- .../components/planner/BitemporalPlanner.java | 6 +- .../planner/BitemporalSnapshotPlanner.java | 6 +- .../components/planner/BulkLoadPlanner.java | 144 +++++++++++++++--- .../planner/NontemporalDeltaPlanner.java | 6 +- .../planner/NontemporalSnapshotPlanner.java | 12 +- .../components/planner/Planner.java | 6 +- .../components/planner/Planners.java | 31 ++-- .../planner/UnitemporalDeltaPlanner.java | 6 +- .../planner/UnitemporalPlanner.java | 7 +- .../planner/UnitemporalSnapshotPlanner.java | 6 +- .../components/util/Capability.java | 1 + .../api/RelationalGeneratorAbstract.java | 6 +- .../api/RelationalIngestorAbstract.java | 6 +- .../components/relational/h2/H2Sink.java | 1 + .../relational/snowflake/SnowflakeSink.java | 1 + .../NontemporalSnapshotTestCases.java | 2 +- 18 files changed, 187 insertions(+), 72 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java index 21a21628db3..0476ee62baf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/AppendOnlyPlanner.java @@ -60,9 +60,9 @@ class AppendOnlyPlanner extends Planner { private final Optional dataSplitInRangeCondition; - AppendOnlyPlanner(Datasets datasets, AppendOnly ingestMode, PlannerOptions plannerOptions) + AppendOnlyPlanner(Datasets datasets, AppendOnly ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); // validate ingestMode.deduplicationStrategy().accept(new ValidatePrimaryKeys(primaryKeys, this::validatePrimaryKeysIsEmpty, @@ -83,7 +83,7 @@ protected AppendOnly ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { List fieldsToSelect = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java index d3b06d47a4c..93a97abfec8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalDeltaPlanner.java @@ -105,9 +105,9 @@ class BitemporalDeltaPlanner extends BitemporalPlanner private List primaryKeyFieldsAndFromFieldForSelection; private List dataFields; - BitemporalDeltaPlanner(Datasets datasets, BitemporalDelta ingestMode, PlannerOptions plannerOptions) + BitemporalDeltaPlanner(Datasets datasets, BitemporalDelta ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); if (ingestMode().validityMilestoning().validityDerivation() instanceof SourceSpecifiesFromDateTime && ingestMode().deduplicationStrategy() instanceof FilterDuplicates) { @@ -201,7 +201,7 @@ protected BitemporalDelta ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { List operations = new ArrayList<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalPlanner.java index baff665cf4b..61745fad675 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalPlanner.java @@ -25,17 +25,19 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import java.util.ArrayList; import java.util.List; import java.util.Optional; +import java.util.Set; abstract class BitemporalPlanner extends UnitemporalPlanner { - BitemporalPlanner(Datasets datasets, BitemporalMilestoned bitemporalMilestoned, PlannerOptions plannerOptions) + BitemporalPlanner(Datasets datasets, BitemporalMilestoned bitemporalMilestoned, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, bitemporalMilestoned, plannerOptions); + super(datasets, bitemporalMilestoned, plannerOptions, capabilities); // validate String targetValidDateTimeFrom = bitemporalMilestoned.validityMilestoning().accept(EXTRACT_TARGET_VALID_DATE_TIME_FROM); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java index 854d0cceeb4..1d52c6bdeea 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BitemporalSnapshotPlanner.java @@ -45,9 +45,9 @@ class BitemporalSnapshotPlanner extends BitemporalPlanner { - BitemporalSnapshotPlanner(Datasets datasets, BitemporalSnapshot ingestMode, PlannerOptions plannerOptions) + BitemporalSnapshotPlanner(Datasets datasets, BitemporalSnapshot ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); // validate @@ -67,7 +67,7 @@ protected BitemporalSnapshot ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { List> keyValuePairs = keyValuesForMilestoningUpdate(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 4fffec17bcc..629aabdc34b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -21,7 +21,11 @@ import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; @@ -43,15 +47,19 @@ import java.util.stream.Collectors; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.TEMP_DATASET_BASE_NAME; +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.UNDERSCORE; class BulkLoadPlanner extends Planner { + private boolean allowExtraFieldsWhileCopying; + private Dataset tempDataset; private StagedFilesDataset stagedFilesDataset; - BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions) + BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); // validation if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) @@ -60,6 +68,18 @@ class BulkLoadPlanner extends Planner } stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); + + allowExtraFieldsWhileCopying = capabilities.contains(Capability.ALLOW_EXTRA_FIELDS_WHILE_COPYING); + if (!allowExtraFieldsWhileCopying) + { + tempDataset = DatasetDefinition.builder() + .schema(datasets.stagingDataset().schema()) + .database(datasets.mainDataset().datasetReference().database()) + .group(datasets.mainDataset().datasetReference().group()) + .name(datasets.mainDataset().datasetReference().name() + UNDERSCORE + TEMP_DATASET_BASE_NAME) + .alias(TEMP_DATASET_BASE_NAME) + .build(); + } } @Override @@ -69,45 +89,108 @@ protected BulkLoad ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) + { + if (allowExtraFieldsWhileCopying) + { + return buildLogicalPlanForIngestUsingCopy(resources); + } + else + { + return buildLogicalPlanForIngestUsingCopyAndInsert(resources); + } + } + + private LogicalPlan buildLogicalPlanForIngestUsingCopy(Resources resources) { List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - // Digest Generation if (ingestMode().generateDigest()) { - Value digestValue = DigestUdf - .builder() - .udfName(ingestMode().digestUdfName().orElseThrow(IllegalStateException::new)) - .addAllFieldNames(stagingDataset().schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) - .addAllValues(fieldsToSelect) - .build(); - String digestField = ingestMode().digestField().orElseThrow(IllegalStateException::new); - fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(digestField).build()); - fieldsToSelect.add(digestValue); + addDigest(fieldsToInsert, fieldsToSelect, fieldsToSelect); } if (ingestMode().auditing().accept(AUDIT_ENABLED)) { - BatchStartTimestamp batchStartTimestamp = BatchStartTimestamp.INSTANCE; - fieldsToSelect.add(batchStartTimestamp); - String auditField = ingestMode().auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); - fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(auditField).build()); + addAuditing(fieldsToInsert, fieldsToSelect); } if (ingestMode().lineageField().isPresent()) { - fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().lineageField().get()).build()); - List files = stagedFilesDataset.stagedFilesDatasetProperties().files(); - String lineageValue = String.join(",", files); - fieldsToSelect.add(StringValue.of(lineageValue)); + addLineage(fieldsToInsert, fieldsToSelect); } Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); } + private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resources) + { + List operations = new ArrayList<>(); + + + // Operation 1: Copy into a temp table + List fieldsToSelectFromStage = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); + List fieldsToInsertIntoTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); + Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelectFromStage).build(); + operations.add(Copy.of(mainDataset(), selectStage, fieldsToInsertIntoTemp)); + + + // Operation 2: Transfer from temp table into target table, adding extra columns at the same time + List fieldsToSelectFromTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); + List fieldsToInsertIntoMain = new ArrayList<>(mainDataset().schemaReference().fieldValues()); + + if (ingestMode().generateDigest()) + { + addDigest(fieldsToInsertIntoMain, fieldsToSelectFromTemp, fieldsToSelectFromStage); + } + + if (ingestMode().auditing().accept(AUDIT_ENABLED)) + { + addAuditing(fieldsToInsertIntoMain, fieldsToSelectFromTemp); + } + + if (ingestMode().lineageField().isPresent()) + { + addLineage(fieldsToInsertIntoMain, fieldsToSelectFromTemp); + } + + operations.add(Insert.of(mainDataset(), Selection.builder().source(tempDataset).addAllFields(fieldsToSelectFromTemp).build(), fieldsToInsertIntoMain)); + + + return LogicalPlan.of(operations); + } + + private void addDigest(List fieldsToInsert, List fieldsToSelect, List fieldsForDigestCalculation) + { + Value digestValue = DigestUdf + .builder() + .udfName(ingestMode().digestUdfName().orElseThrow(IllegalStateException::new)) + .addAllFieldNames(stagingDataset().schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) + .addAllValues(fieldsForDigestCalculation) + .build(); + String digestField = ingestMode().digestField().orElseThrow(IllegalStateException::new); + fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(digestField).build()); + fieldsToSelect.add(digestValue); + } + + private void addAuditing(List fieldsToInsert, List fieldsToSelect) + { + BatchStartTimestamp batchStartTimestamp = BatchStartTimestamp.INSTANCE; + String auditField = ingestMode().auditing().accept(AuditingVisitors.EXTRACT_AUDIT_FIELD).orElseThrow(IllegalStateException::new); + fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(auditField).build()); + fieldsToSelect.add(batchStartTimestamp); + } + + private void addLineage(List fieldsToInsert, List fieldsToSelect) + { + List files = stagedFilesDataset.stagedFilesDatasetProperties().files(); + String lineageValue = String.join(",", files); + fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().lineageField().get()).build()); + fieldsToSelect.add(StringValue.of(lineageValue)); + } + @Override public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { @@ -117,6 +200,10 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { // TODO: Check if Create Stage is needed } + if (!allowExtraFieldsWhileCopying) + { + operations.add(Create.of(true, tempDataset)); + } return LogicalPlan.of(operations); } @@ -124,6 +211,21 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) public LogicalPlan buildLogicalPlanForPostActions(Resources resources) { List operations = new ArrayList<>(); + if (!allowExtraFieldsWhileCopying) + { + operations.add(Delete.builder().dataset(tempDataset).build()); + } + return LogicalPlan.of(operations); + } + + @Override + public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) + { + List operations = new ArrayList<>(); + if (!allowExtraFieldsWhileCopying) + { + operations.add(Drop.of(true, tempDataset, true)); + } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java index fe4d9638fba..12042820c56 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalDeltaPlanner.java @@ -68,9 +68,9 @@ class NontemporalDeltaPlanner extends Planner private final Optional dataSplitInRangeCondition; - NontemporalDeltaPlanner(Datasets datasets, NontemporalDelta ingestMode, PlannerOptions plannerOptions) + NontemporalDeltaPlanner(Datasets datasets, NontemporalDelta ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); // validate validatePrimaryKeysNotEmpty(primaryKeys); @@ -103,7 +103,7 @@ protected NontemporalDelta ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { List operations = new ArrayList<>(); // Op1: Merge data from staging to main diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java index a0424f612f7..d5adad5f1d4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/NontemporalSnapshotPlanner.java @@ -42,22 +42,18 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Optional; +import java.util.Set; -import static org.finos.legend.engine.persistence.components.common.StatisticName.INCOMING_RECORD_COUNT; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_DELETED; -import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; -import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_TERMINATED; -import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_UPDATED; import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.ALL_COLUMNS; import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.getPrimaryKeyMatchCondition; class NontemporalSnapshotPlanner extends Planner { - NontemporalSnapshotPlanner(Datasets datasets, NontemporalSnapshot ingestMode, PlannerOptions plannerOptions) + NontemporalSnapshotPlanner(Datasets datasets, NontemporalSnapshot ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); } @Override @@ -67,7 +63,7 @@ protected NontemporalSnapshot ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { Dataset stagingDataset = stagingDataset(); List fieldsToSelect = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 1ec5ff31c1e..a2a6e1634fe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -101,13 +101,15 @@ default boolean enableConcurrentSafety() private final Datasets datasets; private final IngestMode ingestMode; private final PlannerOptions plannerOptions; + protected final Set capabilities; protected final List primaryKeys; - Planner(Datasets datasets, IngestMode ingestMode, PlannerOptions plannerOptions) + Planner(Datasets datasets, IngestMode ingestMode, PlannerOptions plannerOptions, Set capabilities) { this.datasets = datasets; this.ingestMode = ingestMode; this.plannerOptions = plannerOptions == null ? PlannerOptions.builder().build() : plannerOptions; + this.capabilities = capabilities; this.primaryKeys = findCommonPrimaryKeysBetweenMainAndStaging(); } @@ -147,7 +149,7 @@ protected PlannerOptions options() return plannerOptions; } - public abstract LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities); + public abstract LogicalPlan buildLogicalPlanForIngest(Resources resources); public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java index c6949395af4..321c480b129 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java @@ -33,6 +33,9 @@ import org.finos.legend.engine.persistence.components.ingestmode.UnitemporalSnapshotAbstract; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoadAbstract; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.util.Capability; + +import java.util.Set; public class Planners { @@ -40,73 +43,75 @@ private Planners() { } - public static Planner get(Datasets datasets, IngestMode ingestMode) + public static Planner get(Datasets datasets, IngestMode ingestMode, Set capabilities) { - return ingestMode.accept(new PlannerFactory(datasets, PlannerOptions.builder().build())); + return ingestMode.accept(new PlannerFactory(datasets, PlannerOptions.builder().build(), capabilities)); } - public static Planner get(Datasets datasets, IngestMode ingestMode, PlannerOptions plannerOptions) + public static Planner get(Datasets datasets, IngestMode ingestMode, PlannerOptions plannerOptions, Set capabilities) { - return ingestMode.accept(new PlannerFactory(datasets, plannerOptions)); + return ingestMode.accept(new PlannerFactory(datasets, plannerOptions, capabilities)); } static class PlannerFactory implements IngestModeVisitor { private final Datasets datasets; private final PlannerOptions plannerOptions; + private final Set capabilities; - PlannerFactory(Datasets datasets, PlannerOptions plannerOptions) + PlannerFactory(Datasets datasets, PlannerOptions plannerOptions, Set capabilities) { this.datasets = datasets; this.plannerOptions = plannerOptions; + this.capabilities = capabilities; } @Override public Planner visitAppendOnly(AppendOnlyAbstract appendOnly) { - return new AppendOnlyPlanner(datasets, (AppendOnly) appendOnly, plannerOptions); + return new AppendOnlyPlanner(datasets, (AppendOnly) appendOnly, plannerOptions, capabilities); } @Override public Planner visitNontemporalSnapshot(NontemporalSnapshotAbstract nontemporalSnapshot) { - return new NontemporalSnapshotPlanner(datasets, (NontemporalSnapshot) nontemporalSnapshot, plannerOptions); + return new NontemporalSnapshotPlanner(datasets, (NontemporalSnapshot) nontemporalSnapshot, plannerOptions, capabilities); } @Override public Planner visitNontemporalDelta(NontemporalDeltaAbstract nontemporalDelta) { - return new NontemporalDeltaPlanner(datasets, (NontemporalDelta) nontemporalDelta, plannerOptions); + return new NontemporalDeltaPlanner(datasets, (NontemporalDelta) nontemporalDelta, plannerOptions, capabilities); } @Override public Planner visitUnitemporalSnapshot(UnitemporalSnapshotAbstract unitemporalSnapshot) { - return new UnitemporalSnapshotPlanner(datasets, (UnitemporalSnapshot) unitemporalSnapshot, plannerOptions); + return new UnitemporalSnapshotPlanner(datasets, (UnitemporalSnapshot) unitemporalSnapshot, plannerOptions, capabilities); } @Override public Planner visitUnitemporalDelta(UnitemporalDeltaAbstract unitemporalDelta) { - return new UnitemporalDeltaPlanner(datasets, (UnitemporalDelta) unitemporalDelta, plannerOptions); + return new UnitemporalDeltaPlanner(datasets, (UnitemporalDelta) unitemporalDelta, plannerOptions, capabilities); } @Override public Planner visitBitemporalSnapshot(BitemporalSnapshotAbstract bitemporalSnapshot) { - return new BitemporalSnapshotPlanner(datasets, (BitemporalSnapshot) bitemporalSnapshot, plannerOptions); + return new BitemporalSnapshotPlanner(datasets, (BitemporalSnapshot) bitemporalSnapshot, plannerOptions, capabilities); } @Override public Planner visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) { - return new BitemporalDeltaPlanner(datasets, (BitemporalDelta) bitemporalDelta, plannerOptions); + return new BitemporalDeltaPlanner(datasets, (BitemporalDelta) bitemporalDelta, plannerOptions, capabilities); } @Override public Planner visitBulkLoad(BulkLoadAbstract bulkLoad) { - return new BulkLoadPlanner(datasets, (BulkLoad) bulkLoad, plannerOptions); + return new BulkLoadPlanner(datasets, (BulkLoad) bulkLoad, plannerOptions, capabilities); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java index b93ef293767..58cf0f92144 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalDeltaPlanner.java @@ -64,9 +64,9 @@ class UnitemporalDeltaPlanner extends UnitemporalPlanner private final Optional deleteIndicatorIsSetCondition; private final Optional dataSplitInRangeCondition; - UnitemporalDeltaPlanner(Datasets datasets, UnitemporalDelta ingestMode, PlannerOptions plannerOptions) + UnitemporalDeltaPlanner(Datasets datasets, UnitemporalDelta ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); // Validate if the optimizationFilters are comparable if (!ingestMode.optimizationFilters().isEmpty()) @@ -98,7 +98,7 @@ protected UnitemporalDelta ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { List operations = new ArrayList<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java index 4b43d78ce4c..3b021655afc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalPlanner.java @@ -46,6 +46,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.DiffBinaryValueOperator; +import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; @@ -57,6 +58,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_UPDATED; @@ -76,13 +78,14 @@ abstract class UnitemporalPlanner extends Planner protected Condition primaryKeysMatchCondition; - UnitemporalPlanner(Datasets datasets, TransactionMilestoned transactionMilestoned, PlannerOptions plannerOptions) + UnitemporalPlanner(Datasets datasets, TransactionMilestoned transactionMilestoned, PlannerOptions plannerOptions, Set capabilities) { super(datasets.metadataDataset().isPresent() ? datasets : datasets.withMetadataDataset(MetadataDataset.builder().build()), transactionMilestoned, - plannerOptions); + plannerOptions, + capabilities); // validate validatePrimaryKeysNotEmpty(primaryKeys); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index 019adbbffc5..007e3f3054e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -49,9 +49,9 @@ class UnitemporalSnapshotPlanner extends UnitemporalPlanner { - UnitemporalSnapshotPlanner(Datasets datasets, UnitemporalSnapshot ingestMode, PlannerOptions plannerOptions) + UnitemporalSnapshotPlanner(Datasets datasets, UnitemporalSnapshot ingestMode, PlannerOptions plannerOptions, Set capabilities) { - super(datasets, ingestMode, plannerOptions); + super(datasets, ingestMode, plannerOptions, capabilities); // validate if (ingestMode.partitioned()) @@ -72,7 +72,7 @@ protected UnitemporalSnapshot ingestMode() } @Override - public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set capabilities) + public LogicalPlan buildLogicalPlanForIngest(Resources resources) { List> keyValuePairs = keyValuesForMilestoningUpdate(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index 16acfd6bd94..409c73a2922 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -22,4 +22,5 @@ public enum Capability EXPLICIT_DATA_TYPE_CONVERSION, DATA_TYPE_LENGTH_CHANGE, DATA_TYPE_SCALE_CHANGE, + ALLOW_EXTRA_FIELDS_WHILE_COPYING; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index b33305e76fd..b15c4cf8976 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -174,7 +174,7 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources) Datasets datasetsWithCaseConversion = ApiUtils.enrichAndApplyCase(datasets, caseConversion()); Dataset enrichedMainDataset = ApiUtils.deriveMainDatasetFromStaging(datasetsWithCaseConversion, ingestModeWithCaseConversion); Datasets enrichedDatasets = datasetsWithCaseConversion.withMainDataset(enrichedMainDataset); - Planner planner = Planners.get(enrichedDatasets, ingestModeWithCaseConversion, plannerOptions()); + Planner planner = Planners.get(enrichedDatasets, ingestModeWithCaseConversion, plannerOptions(), relationalSink().capabilities()); return generateOperations(enrichedDatasets, resources, planner, ingestModeWithCaseConversion); } @@ -225,11 +225,11 @@ GeneratorResult generateOperations(Datasets datasets, Resources resources, Plann schemaEvolutionDataset = Optional.of(schemaEvolutionResult.evolvedDataset()); // update main dataset with evolved schema and re-initialize planner - planner = Planners.get(datasets.withMainDataset(schemaEvolutionDataset.get()), ingestMode, plannerOptions()); + planner = Planners.get(datasets.withMainDataset(schemaEvolutionDataset.get()), ingestMode, plannerOptions(), relationalSink().capabilities()); } // ingest - LogicalPlan ingestLogicalPlan = planner.buildLogicalPlanForIngest(resources, relationalSink().capabilities()); + LogicalPlan ingestLogicalPlan = planner.buildLogicalPlanForIngest(resources); SqlPlan ingestSqlPlan = transformer.generatePhysicalPlan(ingestLogicalPlan); // metadata-ingest diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 5f6f605a0ef..65ea731f606 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -464,7 +464,7 @@ private void init(Datasets datasets) .batchIdPattern(BATCH_ID_PATTERN) .build(); - planner = Planners.get(enrichedDatasets, enrichedIngestMode, plannerOptions()); + planner = Planners.get(enrichedDatasets, enrichedIngestMode, plannerOptions(), relationalSink().capabilities()); generatorResult = generator.generateOperations(enrichedDatasets, resourcesBuilder.build(), planner, enrichedIngestMode); } @@ -533,8 +533,10 @@ private List performBulkLoad(Datasets datasets, Transformer, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index b701708aa45..a1061edce23 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -109,6 +109,7 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.ADD_COLUMN); capabilities.add(Capability.IMPLICIT_DATA_TYPE_CONVERSION); capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); + capabilities.add(Capability.ALLOW_EXTRA_FIELDS_WHILE_COPYING); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java index 4a81ebce332..979a71ccd76 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-test/src/test/java/org/finos/legend/engine/persistence/components/testcases/ingestmode/nontemporal/NontemporalSnapshotTestCases.java @@ -203,7 +203,7 @@ public void testNontemporalSnapshotWithDropStagingData() TestScenario testScenario = scenarios.NO_AUDTING__NO_DATASPLIT(); PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); Resources resources = Resources.builder().externalDatasetImported(true).build(); - Planner planner = Planners.get(testScenario.getDatasets(), testScenario.getIngestMode(), options); + Planner planner = Planners.get(testScenario.getDatasets(), testScenario.getIngestMode(), options, getRelationalSink().capabilities()); RelationalTransformer transformer = new RelationalTransformer(getRelationalSink()); // post actions From c82272b287f22eebf75c1df6784b53bd6d94daa3 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Tue, 12 Sep 2023 16:31:34 +0800 Subject: [PATCH 35/57] Add Support for metadata for BulkLoad Task --- .../components/common/DatasetsAbstract.java | 3 + .../ingestmode/BulkLoadAbstract.java | 4 +- .../DeriveMainDatasetSchemaFromStaging.java | 15 +- .../ingestmode/IngestModeCaseConverter.java | 2 +- .../ingestmode/digest/DigestGenStrategy.java | 21 ++ .../digest/DigestGenStrategyVisitor.java | 22 ++ .../digest/NoDigestGenStrategyAbstract.java | 33 +++ .../UDFBasedDigestGenStrategyAbstract.java | 34 +++ .../datasets/DatasetCaseConverter.java | 16 ++ .../datasets/DatasetsCaseConverter.java | 3 + .../values/BatchInsertTimestampAbstract.java | 31 +++ .../components/planner/BulkLoadPlanner.java | 56 ++++- .../components/planner/Planner.java | 10 + .../components/planner/Planners.java | 9 + .../AppendLogMetadataDatasetAbstract.java | 107 +++++++++ .../util/AppendLogMetadataUtils.java | 85 ++++++++ .../components/util/LogicalPlanUtils.java | 2 + .../visitors/BatchInsertTimestampVisitor.java | 32 +++ .../util/AppendLogDatasetUtilsAnsiTest.java | 40 ++++ .../util/AppendLogDatasetUtilsTest.java | 87 ++++++++ .../visitor/BatchInsertTimestampVisitor.java | 32 +++ .../AppendLogDatasetUtilsBigQueryTest.java | 41 ++++ .../components/relational/api/ApiUtils.java | 23 +- .../api/RelationalGeneratorAbstract.java | 13 ++ .../api/RelationalIngestorAbstract.java | 8 + .../persistence/components/BaseTest.java | 10 + .../ingestmode/bulkload/BulkLoadTest.java | 205 ++++++++---------- .../bulk-load/expected/expected_table1.csv | 6 +- .../bulk-load/expected/expected_table2.csv | 6 +- .../bulk-load/expected/expected_table3.csv | 6 +- .../bulk-load/expected/expected_table4.csv | 6 +- .../bulk-load/expected/expected_table5.csv | 3 - .../data/bulk-load/input/staged_file5.csv | 3 - .../components/ingestmode/BulkLoadTest.java | 43 ++-- .../AppendLogDatasetUtilsSnowflakeTest.java | 40 ++++ 35 files changed, 873 insertions(+), 184 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategy.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/NoDigestGenStrategyAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BatchInsertTimestampAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BatchInsertTimestampVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/BatchInsertTimestampVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java index 847c88f16c6..d8c58c6bb86 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.common; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.immutables.value.Value.Immutable; @@ -41,6 +42,8 @@ public interface DatasetsAbstract Optional metadataDataset(); + Optional appendLogMetadataDataset(); + Optional tempDataset(); Optional tempDatasetWithDeleteIndicator(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java index 493def6e34e..01b34c0c82d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java @@ -28,14 +28,14 @@ ) public interface BulkLoadAbstract extends IngestMode { + String batchIdField(); + boolean generateDigest(); Optional digestUdfName(); Optional digestField(); - Optional lineageField(); - Auditing auditing(); @Override diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index 6c4840d90e9..e0f8dd81908 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -146,16 +146,13 @@ public Dataset visitBulkLoad(BulkLoadAbstract bulkLoad) { addDigestField(mainSchemaFields, bulkLoad.digestField().get()); } + Field batchIdField = Field.builder() + .name(bulkLoad.batchIdField()) + .type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())) + .primaryKey(false) + .build(); + mainSchemaFields.add(batchIdField); bulkLoad.auditing().accept(new EnrichSchemaWithAuditing(mainSchemaFields, false)); - if (bulkLoad.lineageField().isPresent()) - { - Field lineageField = Field.builder() - .name(bulkLoad.lineageField().get()) - .type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())) - .primaryKey(false) - .build(); - mainSchemaFields.add(lineageField); - } return mainDatasetDefinitionBuilder.schema(mainSchemaDefinitionBuilder.addAllFields(mainSchemaFields).build()).build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java index 8770d654fcd..c024e09769a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java @@ -160,10 +160,10 @@ public IngestMode visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) public IngestMode visitBulkLoad(BulkLoadAbstract bulkLoad) { return BulkLoad.builder() + .batchIdField(applyCase(bulkLoad.batchIdField())) .digestField(applyCase(bulkLoad.digestField())) .digestUdfName(bulkLoad.digestUdfName()) .generateDigest(bulkLoad.generateDigest()) - .lineageField(applyCase(bulkLoad.lineageField())) .auditing(bulkLoad.auditing().accept(new AuditingCaseConverter())) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategy.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategy.java new file mode 100644 index 00000000000..94659e957cf --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategy.java @@ -0,0 +1,21 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.digest; + + +public interface DigestGenStrategy +{ + T accept(DigestGenStrategyVisitor visitor); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java new file mode 100644 index 00000000000..a45a2e91735 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/DigestGenStrategyVisitor.java @@ -0,0 +1,22 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.digest; + +public interface DigestGenStrategyVisitor +{ + T visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy); + + T visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/NoDigestGenStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/NoDigestGenStrategyAbstract.java new file mode 100644 index 00000000000..59832960ac0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/NoDigestGenStrategyAbstract.java @@ -0,0 +1,33 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.digest; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface NoDigestGenStrategyAbstract extends DigestGenStrategy +{ + default T accept(DigestGenStrategyVisitor visitor) + { + return visitor.visitNoDigestGenStrategy(this); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java new file mode 100644 index 00000000000..2fdee7a6559 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java @@ -0,0 +1,34 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode.digest; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface UDFBasedDigestGenStrategyAbstract extends DigestGenStrategy +{ + @Override + default T accept(DigestGenStrategyVisitor visitor) + { + return visitor.visitUDFBasedDigestGenStrategy(this); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java index 9f118acdeeb..6e7deb1adbf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -151,6 +152,21 @@ public MetadataDataset applyCaseOnMetadataDataset(MetadataDataset metadataDatase .build(); } + public AppendLogMetadataDataset applyCaseOnAppendLogMetadataDataset(AppendLogMetadataDataset appendLogMetadataDataset, Function strategy) + { + return AppendLogMetadataDataset.builder() + .database(appendLogMetadataDataset.database().map(strategy)) + .group(appendLogMetadataDataset.group().map(strategy)) + .name(strategy.apply(appendLogMetadataDataset.name())) + .batchIdField(strategy.apply(appendLogMetadataDataset.batchIdField())) + .tableNameField(strategy.apply(appendLogMetadataDataset.tableNameField())) + .batchStartTimeField(strategy.apply(appendLogMetadataDataset.batchStartTimeField())) + .batchEndTimeField(strategy.apply(appendLogMetadataDataset.batchEndTimeField())) + .batchStatusField(strategy.apply(appendLogMetadataDataset.batchStatusField())) + .batchSourceInfoField(strategy.apply(appendLogMetadataDataset.batchSourceInfoField())) + .build(); + } + public LockInfoDataset applyCaseOnLockInfoDataset(LockInfoDataset lockInfoDataset, Function strategy) { return LockInfoDataset.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java index ca6d79e8d38..2fb6720c69c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java @@ -15,6 +15,7 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -33,6 +34,7 @@ public Datasets applyCase(Datasets datasets, Function strategy) Optional tempWithDeleteIndicator = datasets.tempDatasetWithDeleteIndicator().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional stagingWithoutDuplicates = datasets.stagingDatasetWithoutDuplicates().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional metadata = Optional.ofNullable(datasetCaseConverter.applyCaseOnMetadataDataset(datasets.metadataDataset().orElseThrow(IllegalStateException::new), strategy)); + Optional appendLogMetadataDataset = Optional.ofNullable(datasetCaseConverter.applyCaseOnAppendLogMetadataDataset(datasets.appendLogMetadataDataset().orElseThrow(IllegalStateException::new), strategy)); Optional lockInfo = Optional.ofNullable(datasetCaseConverter.applyCaseOnLockInfoDataset(datasets.lockInfoDataset().orElseThrow(IllegalStateException::new), strategy)); return Datasets.builder() @@ -42,6 +44,7 @@ public Datasets applyCase(Datasets datasets, Function strategy) .tempDatasetWithDeleteIndicator(tempWithDeleteIndicator) .stagingDatasetWithoutDuplicates(stagingWithoutDuplicates) .metadataDataset(metadata) + .appendLogMetadataDataset(appendLogMetadataDataset) .lockInfoDataset(lockInfo) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BatchInsertTimestampAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BatchInsertTimestampAbstract.java new file mode 100644 index 00000000000..e092112f46b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BatchInsertTimestampAbstract.java @@ -0,0 +1,31 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface BatchInsertTimestampAbstract extends Value +{ + BatchInsertTimestamp INSTANCE = BatchInsertTimestamp.builder().build(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 4fffec17bcc..cb14f17a852 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.planner; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.Resources; import org.finos.legend.engine.persistence.components.common.StatisticName; @@ -25,6 +27,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -32,10 +35,13 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataUtils; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; @@ -49,6 +55,8 @@ class BulkLoadPlanner extends Planner private StagedFilesDataset stagedFilesDataset; + private AppendLogMetadataDataset appendLogMetadataDataset; + BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions) { super(datasets, ingestMode, plannerOptions); @@ -60,6 +68,7 @@ class BulkLoadPlanner extends Planner } stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); + appendLogMetadataDataset = appendLogMetadataDataset().orElseThrow(IllegalStateException::new); } @Override @@ -88,6 +97,10 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set files = stagedFilesDataset.stagedFilesDatasetProperties().files(); - String lineageValue = String.join(",", files); - fieldsToSelect.add(StringValue.of(lineageValue)); - } - Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); } @@ -113,10 +118,7 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { List operations = new ArrayList<>(); operations.add(Create.of(true, mainDataset())); - if (options().createStagingDataset()) - { - // TODO: Check if Create Stage is needed - } + operations.add(Create.of(true, appendLogMetadataDataset.get())); return LogicalPlan.of(operations); } @@ -127,6 +129,20 @@ public LogicalPlan buildLogicalPlanForPostActions(Resources resources) return LogicalPlan.of(operations); } + @Override + public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) + { + AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); + String batchSourceInfo = jsonifyBatchSourceInfo(stagedFilesDataset.stagedFilesDatasetProperties().files()); + + StringValue appendDatasetName = StringValue.of(mainDataset().datasetReference().name()); + StringValue batchIdValue = StringValue.of(options().appendBatchIdValue().orElseThrow(IllegalStateException::new)); + StringValue appendBatchStatusPattern = StringValue.of(options().appendBatchStatusPattern().orElseThrow(IllegalStateException::new)); + + Insert insertMetaData = appendLogMetadataUtils.insertMetaData(batchIdValue, appendDatasetName, BatchStartTimestamp.INSTANCE, BatchEndTimestamp.INSTANCE, appendBatchStatusPattern, StringValue.of(batchSourceInfo)); + return LogicalPlan.of(Arrays.asList(insertMetaData)); + } + @Override public void addPostRunStatsForRowsInserted(Map postRunStatisticsResult) { @@ -154,4 +170,20 @@ private Selection getRowsBasedOnAppendTimestamp(Dataset dataset, String field, S FunctionImpl countFunction = FunctionImpl.builder().functionName(FunctionName.COUNT).addValue(All.INSTANCE).alias(alias).build(); return Selection.builder().source(dataset.datasetReference()).condition(condition).addFields(countFunction).build(); } + + public static String jsonifyBatchSourceInfo(List files) + { + Map batchSourceMap = new HashMap(); + batchSourceMap.put("files", files); + ObjectMapper objectMapper = new ObjectMapper(); + try + { + return objectMapper.writeValueAsString(batchSourceMap); + } + catch (JsonProcessingException e) + { + throw new RuntimeException(e); + } + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 1ec5ff31c1e..d8ba78d7531 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -26,6 +26,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; @@ -96,6 +97,10 @@ default boolean enableConcurrentSafety() { return false; } + + Optional appendBatchIdValue(); + + Optional appendBatchStatusPattern(); } private final Datasets datasets; @@ -132,6 +137,11 @@ protected Optional metadataDataset() return datasets.metadataDataset(); } + protected Optional appendLogMetadataDataset() + { + return datasets.appendLogMetadataDataset(); + } + protected Optional lockInfoDataset() { return datasets.lockInfoDataset(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java index c6949395af4..48641976f0c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java @@ -106,6 +106,15 @@ public Planner visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) @Override public Planner visitBulkLoad(BulkLoadAbstract bulkLoad) { + // Validation: + if (!plannerOptions.appendBatchIdValue().isPresent()) + { + throw new IllegalArgumentException("appendBatchIdValue is mandatory for BulkLoad Ingest mode"); + } + if (!plannerOptions.appendBatchStatusPattern().isPresent()) + { + throw new IllegalArgumentException("appendBatchStatusPattern is mandatory for BulkLoad Ingest mode"); + } return new BulkLoadPlanner(datasets, (BulkLoad) bulkLoad, plannerOptions); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java new file mode 100644 index 00000000000..7798d13a34b --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java @@ -0,0 +1,107 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; + +import java.util.Optional; + +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.DEFAULT_APPENDLOG_META_TABLE; +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; +import static org.immutables.value.Value.Default; +import static org.immutables.value.Value.Derived; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface AppendLogMetadataDatasetAbstract +{ + + Optional database(); + + Optional group(); + + @Default + default String name() + { + return DEFAULT_APPENDLOG_META_TABLE; + } + + @Default + default String batchIdField() + { + return "batch_id"; + } + + @Default + default String tableNameField() + { + return "table_name"; + } + + @Default + default String batchStartTimeField() + { + return "batch_start_ts_utc"; + } + + @Default + default String batchEndTimeField() + { + return "batch_end_ts_utc"; + } + + @Default + default String batchStatusField() + { + return "batch_status"; + } + + @Default + default String batchSourceInfoField() + { + return "batch_source_info"; + } + + + @Derived + default Dataset get() + { + return DatasetDefinition.builder() + .database(database()) + .group(group()) + .name(name()) + .schema(SchemaDefinition.builder() + .addFields(Field.builder().name(batchIdField()).type(FieldType.of(DataType.VARCHAR, 255, null)).build()) + .addFields(Field.builder().name(tableNameField()).type(FieldType.of(DataType.VARCHAR, 255, null)).build()) + .addFields(Field.builder().name(batchStartTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) + .addFields(Field.builder().name(batchEndTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) + .addFields(Field.builder().name(batchStatusField()).type(FieldType.of(DataType.VARCHAR, 32, null)).build()) + .addFields(Field.builder().name(batchSourceInfoField()).type(FieldType.of(DataType.JSON, Optional.empty(), Optional.empty())).build()) + .build()) + .build(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java new file mode 100644 index 00000000000..08198a0bb36 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java @@ -0,0 +1,85 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetReference; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; +import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; + +import java.util.ArrayList; +import java.util.List; + +public class AppendLogMetadataUtils +{ + private final AppendLogMetadataDataset appendLogMetadataDataset; + private final Dataset dataset; + + public AppendLogMetadataUtils(AppendLogMetadataDataset appendLogMetadataDataset) + { + this.appendLogMetadataDataset = appendLogMetadataDataset; + this.dataset = appendLogMetadataDataset.get(); + } + + /* + INSERT INTO batch_metadata ("batchIdField", "tableNameField", "batchStartTimeField", "batchEndTimeField", + "batchStatusField","batchSourceInfoField") + (SELECT '','','{BATCH_START_TIMESTAMP_PLACEHOLDER}','{BATCH_END_TIMESTAMP_PLACEHOLDER}', + '',''); + */ + public Insert insertMetaData(StringValue batchIdValue, StringValue appendLogTableName, + BatchStartTimestamp batchStartTimestamp, BatchEndTimestamp batchEndTimestamp, + StringValue batchStatusValue, StringValue batchSourceInfoValue) + { + DatasetReference metaTableRef = this.dataset.datasetReference(); + FieldValue batchId = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchIdField()).build(); + FieldValue tableName = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.tableNameField()).build(); + + FieldValue batchStartTs = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchStartTimeField()).build(); + FieldValue batchEndTs = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchEndTimeField()).build(); + + FieldValue batchStatus = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchStatusField()).build(); + FieldValue batchSourceInfo = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchSourceInfoField()).build(); + + List metaInsertFields = new ArrayList<>(); + List metaSelectFields = new ArrayList<>(); + + metaInsertFields.add(batchId); + metaSelectFields.add(batchIdValue); + + metaInsertFields.add(tableName); + metaSelectFields.add(appendLogTableName); + + metaInsertFields.add(batchStartTs); + metaSelectFields.add(batchStartTimestamp); + + metaInsertFields.add(batchEndTs); + metaSelectFields.add(batchEndTimestamp); + + metaInsertFields.add(batchStatus); + metaSelectFields.add(batchStatusValue); + + metaInsertFields.add(batchSourceInfo); + metaSelectFields.add(ParseJsonFunction.builder().jsonString(batchSourceInfoValue).build()); + + return Insert.of(dataset, Selection.builder().addAllFields(metaSelectFields).build(), metaInsertFields); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 0848758fb7b..504720e5212 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -74,6 +74,8 @@ public class LogicalPlanUtils { public static final String INFINITE_BATCH_TIME = "9999-12-31 23:59:59"; public static final String DEFAULT_META_TABLE = "batch_metadata"; + public static final String DEFAULT_APPENDLOG_META_TABLE = "appendlog_batch_metadata"; + public static final String DATA_SPLIT_LOWER_BOUND_PLACEHOLDER = "{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}"; public static final String DATA_SPLIT_UPPER_BOUND_PLACEHOLDER = "{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}"; public static final String UNDERSCORE = "_"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BatchInsertTimestampVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BatchInsertTimestampVisitor.java new file mode 100644 index 00000000000..89da82d33d7 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BatchInsertTimestampVisitor.java @@ -0,0 +1,32 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors; + +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchInsertTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestampAbstract; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + + +public class BatchInsertTimestampVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, BatchInsertTimestamp current, VisitorContext context) + { + return new BatchStartTimestampVisitor().visit(prev, BatchStartTimestampAbstract.INSTANCE, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java new file mode 100644 index 00000000000..06cb7d2b807 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java @@ -0,0 +1,40 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.relational.RelationalSink; +import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; + +public class AppendLogDatasetUtilsAnsiTest extends AppendLogDatasetUtilsTest +{ + + public String getExpectedSqlForAppendMetadata() + { + return "INSERT INTO appendlog_batch_metadata " + + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\")" + + " (SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + } + + public String getExpectedSqlForAppendMetadataUpperCase() + { + return "INSERT INTO APPENDLOG_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + + "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + } + + public RelationalSink getRelationalSink() + { + return AnsiSqlSink.get(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java new file mode 100644 index 00000000000..b16c2eeb22c --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java @@ -0,0 +1,87 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; +import org.finos.legend.engine.persistence.components.logicalplan.values.*; +import org.finos.legend.engine.persistence.components.relational.RelationalSink; +import org.finos.legend.engine.persistence.components.relational.SqlPlan; +import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; +import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; +import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; +import org.finos.legend.engine.persistence.components.transformer.TransformOptions; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.time.Clock; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.List; + +public abstract class AppendLogDatasetUtilsTest +{ + + private final ZonedDateTime executionZonedDateTime = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); + private final TransformOptions transformOptions = TransformOptions.builder().executionTimestampClock(Clock.fixed(executionZonedDateTime.toInstant(), ZoneOffset.UTC)).build(); + + private AppendLogMetadataDataset appendLogMetadataDataset = AppendLogMetadataDataset.builder().build(); + + + @Test + public void testInsertAppendMetadata() + { + AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); + StringValue batchIdValue = StringValue.of("batch_id_123"); + StringValue appendLogTableName = StringValue.of("appeng_log_table_name"); + StringValue batchStatusValue = StringValue.of(IngestStatus.SUCCEEDED.toString()); + StringValue batchLineageValue = StringValue.of("my_lineage_value"); + Insert operation = appendLogMetadataUtils.insertMetaData(batchIdValue, appendLogTableName, BatchStartTimestamp.INSTANCE, + BatchEndTimestampAbstract.INSTANCE, batchStatusValue, batchLineageValue); + + RelationalTransformer transformer = new RelationalTransformer(getRelationalSink(), transformOptions); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = getExpectedSqlForAppendMetadata(); + Assertions.assertEquals(expectedSql, list.get(0)); + } + + public abstract String getExpectedSqlForAppendMetadata(); + + @Test + public void testInsertAppendMetadataInUpperCase() + { + AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); + StringValue batchIdValue = StringValue.of("batch_id_123"); + StringValue appendLogTableName = StringValue.of("appeng_log_table_name"); + StringValue batchStatusValue = StringValue.of(IngestStatus.SUCCEEDED.toString()); + StringValue batchLineageValue = StringValue.of("my_lineage_value"); + + Insert operation = appendLogMetadataUtils.insertMetaData(batchIdValue, appendLogTableName, + BatchStartTimestamp.INSTANCE, BatchEndTimestampAbstract.INSTANCE, batchStatusValue, batchLineageValue); + + RelationalTransformer transformer = new RelationalTransformer(getRelationalSink(), transformOptions.withOptimizers(new UpperCaseOptimizer())); + LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); + SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); + List list = physicalPlan.getSqlList(); + String expectedSql = getExpectedSqlForAppendMetadataUpperCase(); + Assertions.assertEquals(expectedSql, list.get(0)); + } + + public abstract String getExpectedSqlForAppendMetadataUpperCase(); + + public abstract RelationalSink getRelationalSink(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/BatchInsertTimestampVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/BatchInsertTimestampVisitor.java new file mode 100644 index 00000000000..054a92bd2dd --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/BatchInsertTimestampVisitor.java @@ -0,0 +1,32 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchInsertTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestampAbstract; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + + +public class BatchInsertTimestampVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, BatchInsertTimestamp current, VisitorContext context) + { + return new BatchStartTimestampVisitor().visit(prev, BatchStartTimestampAbstract.INSTANCE, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java new file mode 100644 index 00000000000..82bd262d59e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java @@ -0,0 +1,41 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.relational.RelationalSink; +import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; + +public class AppendLogDatasetUtilsBigQueryTest extends AppendLogDatasetUtilsTest +{ + + public String getExpectedSqlForAppendMetadata() + { + return "INSERT INTO appendlog_batch_metadata " + + "(`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + + "(SELECT 'batch_id_123','appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + } + + public String getExpectedSqlForAppendMetadataUpperCase() + { + return "INSERT INTO APPENDLOG_BATCH_METADATA " + + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + + "(SELECT 'batch_id_123','appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + } + + public RelationalSink getRelationalSink() + { + return BigQuerySink.get(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index 7232fa61be9..bb0ff837db3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -22,6 +22,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.relational.CaseConversion; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -45,9 +46,13 @@ public static Dataset deriveMainDatasetFromStaging(Datasets datasets, IngestMode public static Datasets enrichAndApplyCase(Datasets datasets, CaseConversion caseConversion) { DatasetsCaseConverter converter = new DatasetsCaseConverter(); - MetadataDataset metadataDataset = getMetadataDataset(datasets); + MetadataDataset metadataDataset = datasets.metadataDataset().orElse(MetadataDataset.builder().build()); + AppendLogMetadataDataset appendLogMetadataDataset = datasets.appendLogMetadataDataset().orElse(AppendLogMetadataDataset.builder().build()); LockInfoDataset lockInfoDataset = getLockInfoDataset(datasets); - Datasets enrichedDatasets = datasets.withMetadataDataset(metadataDataset).withLockInfoDataset(lockInfoDataset); + Datasets enrichedDatasets = datasets + .withMetadataDataset(metadataDataset) + .withLockInfoDataset(lockInfoDataset) + .withAppendLogMetadataDataset(appendLogMetadataDataset); if (caseConversion == CaseConversion.TO_UPPER) { return converter.applyCase(enrichedDatasets, String::toUpperCase); @@ -72,20 +77,6 @@ public static IngestMode applyCase(IngestMode ingestMode, CaseConversion caseCon return ingestMode; } - private static MetadataDataset getMetadataDataset(Datasets datasets) - { - MetadataDataset metadataset; - if (datasets.metadataDataset().isPresent()) - { - metadataset = datasets.metadataDataset().get(); - } - else - { - metadataset = MetadataDataset.builder().build(); - } - return metadataset; - } - private static LockInfoDataset getLockInfoDataset(Datasets datasets) { Dataset main = datasets.mainDataset(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index b33305e76fd..0200b626a98 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -56,6 +56,9 @@ ) public abstract class RelationalGeneratorAbstract { + + public static final String APPEND_BATCH_STATUS_PATTERN = "{APPEND_BATCH_STATUS_PLACEHOLDER}"; + //---------- FLAGS ---------- @Default @@ -110,6 +113,14 @@ public boolean enableConcurrentSafety() public abstract Optional infiniteBatchIdValue(); + public abstract Optional appendBatchIdValue(); + + @Default + public String appendBatchStatusPattern() + { + return APPEND_BATCH_STATUS_PATTERN; + } + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); @@ -125,6 +136,8 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) + .appendBatchIdValue(appendBatchIdValue()) + .appendBatchStatusPattern(appendBatchStatusPattern()) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 5f6f605a0ef..dfb4bbfe687 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -79,6 +79,7 @@ import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.TABLE_IS_NON_EMPTY; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.APPEND_BATCH_STATUS_PATTERN; import static org.finos.legend.engine.persistence.components.transformer.Transformer.TransformOptionsAbstract.DATE_TIME_FORMATTER; @Immutable @@ -155,6 +156,8 @@ public Set schemaEvolutionCapabilitySet() return Collections.emptySet(); } + public abstract Optional appendBatchIdValue(); + //---------- FIELDS ---------- public abstract IngestMode ingestMode(); @@ -170,6 +173,8 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) + .appendBatchIdValue(appendBatchIdValue()) + .appendBatchStatusPattern(APPEND_BATCH_STATUS_PATTERN) .build(); } @@ -531,6 +536,9 @@ private List performBulkLoad(Datasets datasets, Transformer expectedStats, boolean verifyStagingFilters) throws Exception + { + // Execute physical plans IngestorResult result = ingestor.performFullIngestion(JdbcConnection.of(h2Sink.connection()), datasets); Map actualStats = result.statisticByName(); @@ -269,7 +275,11 @@ public IngestorResult executePlansAndVerifyForCaseConversion(IngestMode ingestMo .schemaEvolutionCapabilitySet(Collections.emptySet()) .caseConversion(CaseConversion.TO_UPPER) .build(); + return executePlansAndVerifyForCaseConversion(ingestor, datasets, schema, expectedDataPath, expectedStats); + } + public IngestorResult executePlansAndVerifyForCaseConversion(RelationalIngestor ingestor, Datasets datasets, String[] schema, String expectedDataPath, Map expectedStats) throws Exception + { Executor executor = ingestor.init(JdbcConnection.of(h2Sink.connection())); datasets = ingestor.create(datasets); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index bf3a91e7ed7..b2a4ba71cf0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; @@ -32,6 +33,7 @@ import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; +import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; import org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil; import org.finos.legend.engine.persistence.components.relational.h2.H2Sink; import org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets.H2StagedFilesDatasetProperties; @@ -58,11 +60,11 @@ public class BulkLoadTest extends BaseTest private static final String APPEND_TIME = "append_time"; private static final String DIGEST = "digest"; private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; - private static final String LINEAGE = "lake_lineage"; private static final String col_int = "col_int"; private static final String col_string = "col_string"; private static final String col_decimal = "col_decimal"; private static final String col_datetime = "col_datetime"; + private static final String BATCH_ID = "batch_id"; private static Field col1 = Field.builder() .name(col_int) @@ -91,6 +93,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception String filePath = "src/test/resources/data/bulk-load/input/staged_file1.csv"; BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .generateDigest(false) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -116,6 +119,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .appendBatchIdValue("xyz123") .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -125,13 +129,13 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"append_time\" TIMESTAMP)"; + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" VARCHAR,\"append_time\" TIMESTAMP)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + - "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"append_time\") " + - "SELECT " + - "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP),'2000-01-01 00:00:00' " + - "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "'xyz123','2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv'," + + "'col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -140,7 +144,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, APPEND_TIME}; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); @@ -149,7 +153,11 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table1.csv"; - executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); + executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); + + Map appendMetadata = h2Sink.executeQuery("select * from appendlog_batch_metadata").get(0); + verifyAppendMetadata(appendMetadata, filePath); } @Test @@ -160,6 +168,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception BulkLoad bulkLoad = BulkLoad.builder() .generateDigest(false) .auditing(NoAuditing.builder().build()) + .batchIdField("batch_id") .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -183,6 +192,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .appendBatchIdValue("xyz123") .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -192,13 +202,12 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP)"; + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" VARCHAR)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + - "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\") " + - "SELECT " + - "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP) " + - "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file2.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "'xyz123' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file2.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -207,7 +216,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime}; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); @@ -215,7 +224,10 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table2.csv"; - executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); + executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); + Map appendMetadata = h2Sink.executeQuery("select * from appendlog_batch_metadata").get(0); + verifyAppendMetadata(appendMetadata, filePath); } @Test @@ -231,6 +243,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception .digestField(DIGEST) .digestUdfName(DIGEST_UDF) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .batchIdField("batch_id") .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -253,6 +266,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) + .appendBatchIdValue("xyz123") .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -263,14 +277,13 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"append_time\" TIMESTAMP)"; + "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"batch_id\" VARCHAR,\"append_time\" TIMESTAMP)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + - "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"append_time\") " + - "SELECT " + - "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + - "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"]),'2000-01-01 00:00:00' " + - "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"batch_id\", \"append_time\") " + + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + + "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"])," + + "'xyz123','2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -279,7 +292,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, APPEND_TIME}; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, BATCH_ID, APPEND_TIME}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); @@ -288,11 +301,14 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table3.csv"; - executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); + executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); + Map appendMetadata = h2Sink.executeQuery("select * from appendlog_batch_metadata").get(0); + verifyAppendMetadata(appendMetadata, filePath); } @Test - public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws Exception + public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Exception { // Register UDF H2DigestUtil.registerMD5Udf(h2Sink, DIGEST_UDF); @@ -300,87 +316,11 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabled() throws E String filePath = "src/test/resources/data/bulk-load/input/staged_file4.csv"; BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .generateDigest(true) .digestField(DIGEST) .digestUdfName(DIGEST_UDF) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) - .lineageField(LINEAGE) - .build(); - - Dataset stagedFilesDataset = StagedFilesDataset.builder() - .stagedFilesDatasetProperties( - H2StagedFilesDatasetProperties.builder() - .fileFormat(FileFormat.CSV) - .addAllFiles(Collections.singletonList(filePath)).build()) - .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) - .build(); - - Dataset mainDataset = DatasetDefinition.builder() - .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") - .schema(SchemaDefinition.builder().build()) - .build(); - - Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); - - // Verify SQLs using generator - RelationalGenerator generator = RelationalGenerator.builder() - .ingestMode(bulkLoad) - .relationalSink(H2Sink.get()) - .collectStatistics(true) - .executionTimestampClock(fixedClock_2000_01_01) - .build(); - - GeneratorResult operations = generator.generateOperations(datasets); - - List preActionsSql = operations.preActionsSql(); - List ingestSql = operations.ingestSql(); - Map statsSql = operations.postIngestStatisticsSql(); - - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"append_time\" TIMESTAMP,\"lake_lineage\" VARCHAR)"; - - String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + - "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"append_time\", \"lake_lineage\") " + - "SELECT " + - "CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + - "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"])," + - "'2000-01-01 00:00:00'," + - "'src/test/resources/data/bulk-load/input/staged_file4.csv' " + - "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file4.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; - - Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); - Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); - Assertions.assertEquals("SELECT COUNT(*) as \"rowsInserted\" FROM \"TEST_DB\".\"TEST\".\"main\" as my_alias WHERE my_alias.\"append_time\" = '2000-01-01 00:00:00'", statsSql.get(ROWS_INSERTED)); - - - // Verify execution using ingestor - PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, APPEND_TIME, LINEAGE}; - - Map expectedStats = new HashMap<>(); - expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); - expectedStats.put(StatisticName.FILES_LOADED.name(), 1); - expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); - - String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table4.csv"; - - executePlansAndVerifyResults(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); - } - - @Test - public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() throws Exception - { - // Register UDF - H2DigestUtil.registerMD5Udf(h2Sink, DIGEST_UDF); - - String filePath = "src/test/resources/data/bulk-load/input/staged_file5.csv"; - - BulkLoad bulkLoad = BulkLoad.builder() - .generateDigest(true) - .digestField(DIGEST) - .digestUdfName(DIGEST_UDF) - .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) - .lineageField(LINEAGE) .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -403,6 +343,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) + .appendBatchIdValue("xyz123") .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -414,16 +355,14 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN\"" + - "(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" DECIMAL(5,2),\"COL_DATETIME\" TIMESTAMP,\"DIGEST\" VARCHAR,\"APPEND_TIME\" TIMESTAMP,\"LAKE_LINEAGE\" VARCHAR)"; + "(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" DECIMAL(5,2),\"COL_DATETIME\" TIMESTAMP,\"DIGEST\" VARCHAR,\"BATCH_ID\" VARCHAR,\"APPEND_TIME\" TIMESTAMP)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN\" " + - "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"DIGEST\", \"APPEND_TIME\", \"LAKE_LINEAGE\") " + - "SELECT " + - "CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + - "LAKEHOUSE_MD5(ARRAY['COL_INT','COL_STRING','COL_DECIMAL','COL_DATETIME'],ARRAY[\"COL_INT\",\"COL_STRING\",\"COL_DECIMAL\",\"COL_DATETIME\"])," + - "'2000-01-01 00:00:00'," + - "'src/test/resources/data/bulk-load/input/staged_file5.csv' " + - "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file5.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + + "SELECT CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + + "LAKEHOUSE_MD5(ARRAY['COL_INT','COL_STRING','COL_DECIMAL','COL_DATETIME'],ARRAY[\"COL_INT\",\"COL_STRING\",\"COL_DECIMAL\",\"COL_DATETIME\"])," + + "'xyz123','2000-01-01 00:00:00' " + + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file4.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -432,16 +371,19 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledLineageEnabledUpperCase() // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int.toUpperCase(), col_string.toUpperCase(), col_decimal.toUpperCase(), col_datetime.toUpperCase(), DIGEST.toUpperCase(), APPEND_TIME.toUpperCase(), LINEAGE.toUpperCase()}; + String[] schema = new String[]{col_int.toUpperCase(), col_string.toUpperCase(), col_decimal.toUpperCase(), col_datetime.toUpperCase(), DIGEST.toUpperCase(), BATCH_ID.toUpperCase(), APPEND_TIME.toUpperCase()}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); - String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table5.csv"; + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table4.csv"; - executePlansAndVerifyForCaseConversion(bulkLoad, options, datasets, schema, expectedDataPath, expectedStats, fixedClock_2000_01_01); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER); + executePlansAndVerifyForCaseConversion(ingestor, datasets, schema, expectedDataPath, expectedStats); + Map appendMetadata = h2Sink.executeQuery("select * from APPENDLOG_BATCH_METADATA").get(0); + verifyAppendMetadataForUpperCase(appendMetadata, filePath); } @Test @@ -450,6 +392,7 @@ public void testBulkLoadDigestColumnNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .generateDigest(true) .digestUdfName("LAKEHOUSE_UDF") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) @@ -469,6 +412,7 @@ public void testBulkLoadDigestUDFNotProvided() { BulkLoad bulkLoad = BulkLoad.builder() .generateDigest(true) + .batchIdField("batch_id") .digestField("digest") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -487,6 +431,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() { BulkLoad bulkLoad = BulkLoad.builder() .digestField("digest") + .batchIdField("batch_id") .generateDigest(false) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -504,6 +449,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) + .appendBatchIdValue("xyz123") .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -556,4 +502,39 @@ public void testBulkLoadNotCsvFile() Assertions.assertTrue(e.getMessage().contains("Cannot build H2StagedFilesDatasetProperties, only CSV file loading supported")); } } + + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion) + { + return RelationalIngestor.builder() + .ingestMode(ingestMode) + .relationalSink(H2Sink.get()) + .executionTimestampClock(executionTimestampClock) + .cleanupStagingData(options.cleanupStagingData()) + .collectStatistics(options.collectStatistics()) + .appendBatchIdValue("xyz123") + .enableConcurrentSafety(true) + .caseConversion(caseConversion) + .build(); + } + + private void verifyAppendMetadata(Map appendMetadata, String fileName) + { + Assertions.assertEquals("xyz123", appendMetadata.get("batch_id")); + Assertions.assertEquals("SUCCEEDED", appendMetadata.get("batch_status")); + Assertions.assertEquals("main", appendMetadata.get("table_name")); + Assertions.assertEquals(String.format("{\"files\":[\"%s\"]}", fileName), appendMetadata.get("batch_source_info")); + Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_start_ts_utc").toString()); + Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_end_ts_utc").toString()); + } + + private void verifyAppendMetadataForUpperCase(Map appendMetadata, String fileName) + { + Assertions.assertEquals("xyz123", appendMetadata.get("BATCH_ID")); + Assertions.assertEquals("SUCCEEDED", appendMetadata.get("BATCH_STATUS")); + Assertions.assertEquals("MAIN", appendMetadata.get("TABLE_NAME")); + Assertions.assertEquals(String.format("{\"files\":[\"%s\"]}", fileName), appendMetadata.get("BATCH_SOURCE_INFO")); + Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("BATCH_START_TS_UTC").toString()); + Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("BATCH_END_TS_UTC").toString()); + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv index 69cb6d91ae4..b68e9aa646b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,xyz123,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,xyz123,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv index dd2941bedb8..c807b1c4764 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,xyz123 +2,Bella,99.99,2022-01-12 00:00:00.0,xyz123 +49,Sandy,123.45,2022-01-13 00:00:00.0,xyz123 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv index fb82c08158b..8fc9ed0670f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,6366D6AFD9E8B991393E719A5A4E6D35,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,C556B5DC2B9F3A66000202DF9D98EC05,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,051D68CF86951CDE0DF875915940AEC6,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,6366D6AFD9E8B991393E719A5A4E6D35,xyz123,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,C556B5DC2B9F3A66000202DF9D98EC05,xyz123,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,051D68CF86951CDE0DF875915940AEC6,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv index 395da23b52f..074bc2e251d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,6366D6AFD9E8B991393E719A5A4E6D35,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv -2,Bella,99.99,2022-01-12 00:00:00.0,C556B5DC2B9F3A66000202DF9D98EC05,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv -49,Sandy,123.45,2022-01-13 00:00:00.0,051D68CF86951CDE0DF875915940AEC6,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file4.csv \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,4B39799C7A1FB5EFC4BC328966A159E0,xyz123,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,58467B440BCED7607369DC8A260B0607,xyz123,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,29B8C8A6CD28B069290372E6B54B6C72,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv deleted file mode 100644 index 7d90d71c952..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv +++ /dev/null @@ -1,3 +0,0 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,4B39799C7A1FB5EFC4BC328966A159E0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv -2,Bella,99.99,2022-01-12 00:00:00.0,58467B440BCED7607369DC8A260B0607,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv -49,Sandy,123.45,2022-01-13 00:00:00.0,29B8C8A6CD28B069290372E6B54B6C72,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv deleted file mode 100644 index dd2941bedb8..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv +++ /dev/null @@ -1,3 +0,0 @@ -1,Andy,5.20,2022-01-11 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index ae5ee8aa6af..2b555bb0122 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -76,6 +76,7 @@ public class BulkLoadTest public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .digestField("digest") .generateDigest(false) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) @@ -100,24 +101,30 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .appendBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); List ingestSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"append_time\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"batch_id\" VARCHAR,\"append_time\" DATETIME)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + - "(\"col_int\", \"col_integer\", \"append_time\") " + + "(\"col_int\", \"col_integer\", \"batch_id\", \"append_time\") " + "FROM " + - "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",'2000-01-01 00:00:00' " + + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",'batch123','2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage)" + " on_error = 'ABORT_STATEMENT'"; + String expectedMetadataIngestSql = "INSERT INTO appendlog_batch_metadata (\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + + "(SELECT 'batch123','my_name','2000-01-01 00:00:00',SYSDATE(),'{APPEND_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); Assertions.assertEquals("SELECT 0 as \"rowsDeleted\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"rowsTerminated\"", statsSql.get(ROWS_TERMINATED)); @@ -129,6 +136,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .digestField("digest") .generateDigest(false) .auditing(NoAuditing.builder().build()) @@ -153,6 +161,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .ingestMode(bulkLoad) .relationalSink(SnowflakeSink.get()) .collectStatistics(true) + .appendBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -161,11 +170,11 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() List ingestSql = operations.ingestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT,\"col_variant\" VARIANT)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT,\"col_variant\" VARIANT,\"batch_id\" VARCHAR)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + - "(\"col_bigint\", \"col_variant\") " + + "(\"col_bigint\", \"col_variant\", \"batch_id\") " + "FROM " + - "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\" " + + "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\",'batch123' " + "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as t) " + "on_error = 'ABORT_STATEMENT'"; @@ -181,11 +190,11 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .digestField("digest") .generateDigest(true) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .digestUdfName("LAKEHOUSE_MD5") - .lineageField("lake_lineage") .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -208,6 +217,7 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) + .appendBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -217,13 +227,13 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY," + - "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"APPEND_TIME\" DATETIME,\"LAKE_LINEAGE\" VARCHAR)"; + "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" VARCHAR,\"APPEND_TIME\" DATETIME)"; String expectedIngestSql = "COPY INTO \"MY_DB\".\"MY_NAME\" " + - "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"APPEND_TIME\", \"LAKE_LINEAGE\") " + + "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + "FROM " + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('COL_INT',legend_persistence_stage.$1,'COL_INTEGER',legend_persistence_stage.$2))," + - "'2000-01-01 00:00:00','/path/xyz/file1.csv,/path/xyz/file2.csv' " + + "'batch123','2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; @@ -243,6 +253,7 @@ public void testBulkLoadDigestColumnNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .generateDigest(true) .digestUdfName("LAKEHOUSE_UDF") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) @@ -261,6 +272,7 @@ public void testBulkLoadDigestUDFNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .generateDigest(true) .digestField("digest") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) @@ -279,6 +291,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .digestField("digest") .generateDigest(false) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) @@ -299,6 +312,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .appendBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); @@ -314,10 +328,10 @@ public void testBulkLoadStagedFilesDatasetNotProvided() public void testBulkLoadWithDigestAndLineage() { BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField("batch_id") .digestField("digest") .generateDigest(true) .digestUdfName("LAKEHOUSE_UDF") - .lineageField("lake_lineage") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -340,6 +354,7 @@ public void testBulkLoadWithDigestAndLineage() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .appendBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -348,14 +363,14 @@ public void testBulkLoadWithDigestAndLineage() List ingestSql = operations.ingestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"append_time\" DATETIME,\"lake_lineage\" VARCHAR)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"batch_id\" VARCHAR,\"append_time\" DATETIME)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + - "(\"col_int\", \"col_integer\", \"digest\", \"append_time\", \"lake_lineage\") " + + "(\"col_int\", \"col_integer\", \"digest\", \"batch_id\", \"append_time\") " + "FROM " + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + - "'2000-01-01 00:00:00','/path/xyz/file1.csv,/path/xyz/file2.csv' " + + "'batch123','2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java new file mode 100644 index 00000000000..df4c1a12389 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java @@ -0,0 +1,40 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.util; + +import org.finos.legend.engine.persistence.components.relational.RelationalSink; +import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; + +public class AppendLogDatasetUtilsSnowflakeTest extends AppendLogDatasetUtilsTest +{ + + public String getExpectedSqlForAppendMetadata() + { + return "INSERT INTO appendlog_batch_metadata " + + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + + "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + } + + public String getExpectedSqlForAppendMetadataUpperCase() + { + return "INSERT INTO APPENDLOG_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + + "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + } + + public RelationalSink getRelationalSink() + { + return SnowflakeSink.get(); + } +} From c6a2c593507dbfcc727a7310a9b76ce9cf9f0385 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Wed, 13 Sep 2023 11:45:36 +0800 Subject: [PATCH 36/57] Refactor Digest Generation Strategy --- .../ingestmode/BulkLoadAbstract.java | 20 +------ .../DeriveMainDatasetSchemaFromStaging.java | 6 +-- .../ingestmode/IngestModeCaseConverter.java | 29 ++++++++-- .../ingestmode/IngestModeVisitors.java | 41 ++++++++++++-- .../UDFBasedDigestGenStrategyAbstract.java | 4 ++ .../components/planner/BulkLoadPlanner.java | 54 ++++++++++++++----- .../ingestmode/bulkload/BulkLoadTest.java | 28 ++++------ .../components/ingestmode/BulkLoadTest.java | 31 +++++------ 8 files changed, 138 insertions(+), 75 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java index 01b34c0c82d..800ca9c7a6c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadAbstract.java @@ -15,8 +15,8 @@ package org.finos.legend.engine.persistence.components.ingestmode; import org.finos.legend.engine.persistence.components.ingestmode.audit.Auditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; import org.immutables.value.Value; -import java.util.Optional; @Value.Immutable @Value.Style( @@ -30,11 +30,7 @@ public interface BulkLoadAbstract extends IngestMode { String batchIdField(); - boolean generateDigest(); - - Optional digestUdfName(); - - Optional digestField(); + DigestGenStrategy digestGenStrategy(); Auditing auditing(); @@ -43,16 +39,4 @@ default T accept(IngestModeVisitor visitor) { return visitor.visitBulkLoad(this); } - - @Value.Check - default void validate() - { - if (generateDigest()) - { - if (!digestField().isPresent() || !digestUdfName().isPresent()) - { - throw new IllegalArgumentException("For digest generation, digestField & digestUdfName are mandatory"); - } - } - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index e0f8dd81908..cf333ca3b41 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -142,9 +142,10 @@ public Dataset visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) @Override public Dataset visitBulkLoad(BulkLoadAbstract bulkLoad) { - if (bulkLoad.generateDigest()) + Optional digestField = bulkLoad.digestGenStrategy().accept(IngestModeVisitors.EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY); + if (digestField.isPresent()) { - addDigestField(mainSchemaFields, bulkLoad.digestField().get()); + addDigestField(mainSchemaFields, digestField.get()); } Field batchIdField = Field.builder() .name(bulkLoad.batchIdField()) @@ -211,7 +212,6 @@ public Boolean visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) } } - public static class EnrichSchemaWithMergeStrategy implements MergeStrategyVisitor { private List mainSchemaFields; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java index c024e09769a..fcdca2edae4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeCaseConverter.java @@ -25,6 +25,11 @@ import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.MaxVersionStrategy; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.NoVersioningStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategy; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.merge.NoDeletesMergeStrategyAbstract; @@ -161,9 +166,7 @@ public IngestMode visitBulkLoad(BulkLoadAbstract bulkLoad) { return BulkLoad.builder() .batchIdField(applyCase(bulkLoad.batchIdField())) - .digestField(applyCase(bulkLoad.digestField())) - .digestUdfName(bulkLoad.digestUdfName()) - .generateDigest(bulkLoad.generateDigest()) + .digestGenStrategy(bulkLoad.digestGenStrategy().accept(new DigestGenStrategyCaseConverter())) .auditing(bulkLoad.auditing().accept(new AuditingCaseConverter())) .build(); } @@ -217,6 +220,26 @@ public MergeStrategy visitDeleteIndicatorMergeStrategy(DeleteIndicatorMergeStrat } } + private class DigestGenStrategyCaseConverter implements DigestGenStrategyVisitor + { + + @Override + public DigestGenStrategy visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy) + { + return noDigestGenStrategy; + } + + @Override + public DigestGenStrategy visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) + { + return UDFBasedDigestGenStrategy.builder() + .digestUdfName(udfBasedDigestGenStrategy.digestUdfName()) + .digestField(applyCase(udfBasedDigestGenStrategy.digestField())) + .build(); + } + } + + private class TransactionMilestoningCaseConverter implements TransactionMilestoningVisitor { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java index e6440efc860..3cb54406d79 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/IngestModeVisitors.java @@ -19,6 +19,9 @@ import org.finos.legend.engine.persistence.components.ingestmode.deduplication.DeduplicationStrategyVisitor; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FailOnDuplicatesAbstract; import org.finos.legend.engine.persistence.components.ingestmode.deduplication.FilterDuplicatesAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategyVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.ingestmode.merge.MergeStrategyVisitors; import java.util.Collections; @@ -80,7 +83,7 @@ public Boolean visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) @Override public Boolean visitBulkLoad(BulkLoadAbstract bulkLoad) { - return bulkLoad.generateDigest(); + return bulkLoad.digestGenStrategy().accept(DIGEST_GEN_STRATEGY_DIGEST_REQUIRED); } }; @@ -131,7 +134,7 @@ public Optional visitBitemporalDelta(BitemporalDeltaAbstract bitemporalD @Override public Optional visitBulkLoad(BulkLoadAbstract bulkLoad) { - return bulkLoad.digestField(); + return bulkLoad.digestGenStrategy().accept(EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY); } }; @@ -198,7 +201,8 @@ public Set visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) public Set visitBulkLoad(BulkLoadAbstract bulkLoad) { Set metaFields = new HashSet<>(); - bulkLoad.digestField().ifPresent(metaFields::add); + Optional digestField = bulkLoad.digestGenStrategy().accept(EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY); + digestField.ifPresent(metaFields::add); return metaFields; } }; @@ -376,4 +380,35 @@ public Boolean visitFailOnDuplicates(FailOnDuplicatesAbstract failOnDuplicates) return false; } }; + + private static final DigestGenStrategyVisitor DIGEST_GEN_STRATEGY_DIGEST_REQUIRED = new DigestGenStrategyVisitor() + { + @Override + public Boolean visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy) + { + return false; + } + + @Override + public Boolean visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) + { + return true; + } + }; + + public static final DigestGenStrategyVisitor> EXTRACT_DIGEST_FIELD_FROM_DIGEST_GEN_STRATEGY = new DigestGenStrategyVisitor>() + { + @Override + public Optional visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy) + { + return Optional.empty(); + } + + @Override + public Optional visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) + { + return Optional.of(udfBasedDigestGenStrategy.digestField()); + } + }; + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java index 2fdee7a6559..d32de539de3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/digest/UDFBasedDigestGenStrategyAbstract.java @@ -26,6 +26,10 @@ ) public interface UDFBasedDigestGenStrategyAbstract extends DigestGenStrategy { + String digestUdfName(); + + String digestField(); + @Override default T accept(DigestGenStrategyVisitor visitor) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index cb14f17a852..ca37be1bad8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -21,6 +21,9 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.AuditingVisitors; +import org.finos.legend.engine.persistence.components.ingestmode.digest.DigestGenStrategyVisitor; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategyAbstract; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategyAbstract; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; @@ -84,18 +87,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); // Digest Generation - if (ingestMode().generateDigest()) - { - Value digestValue = DigestUdf - .builder() - .udfName(ingestMode().digestUdfName().orElseThrow(IllegalStateException::new)) - .addAllFieldNames(stagingDataset().schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) - .addAllValues(fieldsToSelect) - .build(); - String digestField = ingestMode().digestField().orElseThrow(IllegalStateException::new); - fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(digestField).build()); - fieldsToSelect.add(digestValue); - } + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert)); // Add batch_id field fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); @@ -186,4 +178,42 @@ public static String jsonifyBatchSourceInfo(List files) } } + + static class DigestGeneration implements DigestGenStrategyVisitor + { + private List fieldsToSelect; + private List fieldsToInsert; + private Dataset stagingDataset; + private Dataset mainDataset; + + public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert) + { + this.mainDataset = mainDataset; + this.stagingDataset = stagingDataset; + this.fieldsToSelect = fieldsToSelect; + this.fieldsToInsert = fieldsToInsert; + } + + @Override + public Void visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStrategy) + { + return null; + } + + @Override + public Void visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) + { + Value digestValue = DigestUdf + .builder() + .udfName(udfBasedDigestGenStrategy.digestUdfName()) + .addAllFieldNames(stagingDataset.schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) + .addAllValues(fieldsToSelect) + .build(); + String digestField = udfBasedDigestGenStrategy.digestField(); + fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(digestField).build()); + fieldsToSelect.add(digestValue); + return null; + } + } + } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index b2a4ba71cf0..bc3350fbb8c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -22,6 +22,8 @@ import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -94,7 +96,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .generateDigest(false) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -166,7 +168,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception String filePath = "src/test/resources/data/bulk-load/input/staged_file2.csv"; BulkLoad bulkLoad = BulkLoad.builder() - .generateDigest(false) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(NoAuditing.builder().build()) .batchIdField("batch_id") .build(); @@ -239,9 +241,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception String filePath = "src/test/resources/data/bulk-load/input/staged_file3.csv"; BulkLoad bulkLoad = BulkLoad.builder() - .generateDigest(true) - .digestField(DIGEST) - .digestUdfName(DIGEST_UDF) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).digestField(DIGEST).build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .batchIdField("batch_id") .build(); @@ -317,9 +317,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .generateDigest(true) - .digestField(DIGEST) - .digestUdfName(DIGEST_UDF) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).digestField(DIGEST).build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -392,16 +390,14 @@ public void testBulkLoadDigestColumnNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() - .batchIdField("batch_id") - .generateDigest(true) - .digestUdfName("LAKEHOUSE_UDF") + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); Assertions.fail("Exception was not thrown"); } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("For digest generation, digestField & digestUdfName are mandatory")); + Assertions.assertTrue(e.getMessage().contains("Cannot build UDFBasedDigestGenStrategy, some of required attributes are not set [digestField]")); } } @@ -411,16 +407,15 @@ public void testBulkLoadDigestUDFNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() - .generateDigest(true) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField(DIGEST).build()) .batchIdField("batch_id") - .digestField("digest") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); Assertions.fail("Exception was not thrown"); } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("For digest generation, digestField & digestUdfName are mandatory")); + Assertions.assertTrue(e.getMessage().contains("Cannot build UDFBasedDigestGenStrategy, some of required attributes are not set [digestUdfName]")); } } @@ -430,9 +425,8 @@ public void testBulkLoadStagedFilesDatasetNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() - .digestField("digest") .batchIdField("batch_id") - .generateDigest(false) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 2b555bb0122..cc15332194d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -18,6 +18,8 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; @@ -77,8 +79,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .digestField("digest") - .generateDigest(false) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -137,8 +138,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .digestField("digest") - .generateDigest(false) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(NoAuditing.builder().build()) .build(); @@ -191,10 +191,8 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .digestField("digest") - .generateDigest(true) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField("digest").digestUdfName("LAKEHOUSE_MD5").build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) - .digestUdfName("LAKEHOUSE_MD5") .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -253,16 +251,15 @@ public void testBulkLoadDigestColumnNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName("LAKEHOUSE_MD5").build()) .batchIdField("batch_id") - .generateDigest(true) - .digestUdfName("LAKEHOUSE_UDF") .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); Assertions.fail("Exception was not thrown"); } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("For digest generation, digestField & digestUdfName are mandatory")); + Assertions.assertTrue(e.getMessage().contains("Cannot build UDFBasedDigestGenStrategy, some of required attributes are not set [digestField]")); } } @@ -273,15 +270,14 @@ public void testBulkLoadDigestUDFNotProvided() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .generateDigest(true) - .digestField("digest") + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField("digest").build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); Assertions.fail("Exception was not thrown"); } catch (Exception e) { - Assertions.assertTrue(e.getMessage().contains("For digest generation, digestField & digestUdfName are mandatory")); + Assertions.assertTrue(e.getMessage().contains("Cannot build UDFBasedDigestGenStrategy, some of required attributes are not set [digestUdfName]")); } } @@ -292,8 +288,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .digestField("digest") - .generateDigest(false) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -325,13 +320,11 @@ public void testBulkLoadStagedFilesDatasetNotProvided() } @Test - public void testBulkLoadWithDigestAndLineage() + public void testBulkLoadWithDigest() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") - .digestField("digest") - .generateDigest(true) - .digestUdfName("LAKEHOUSE_UDF") + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField("digest").digestUdfName("LAKEHOUSE_UDF").build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); From 402f683761959a4a7f95c7f8c1349124fb52d672 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 13 Sep 2023 17:28:58 +0800 Subject: [PATCH 37/57] Implement bulk load for big query --- .../common/AvroFileFormatAbstract.java | 5 + .../common/CsvFileFormatAbstract.java | 21 ++++ .../components/common/FileFormat.java | 1 + .../common/JsonFileFormatAbstract.java | 11 ++ .../common/ParquetFileFormatAbstract.java | 5 + .../pom.xml | 26 +++++ .../relational/bigquery/BigQuerySink.java | 15 +++ ...yStagedFilesDatasetPropertiesAbstract.java | 34 ++++++ .../bigquery/sql/visitor/CopyVisitor.java | 41 +++++++ .../StagedFilesDatasetReferenceVisitor.java | 66 +++++++++++ .../visitor/StagedFilesDatasetVisitor.java | 40 +++++++ .../visitor/StagedFilesFieldValueVisitor.java | 36 ++++++ .../visitor/StagedFilesSelectionVisitor.java | 42 +++++++ .../schemaops/statements/CopyStatement.java | 77 +++++++++++++ .../statements/SelectFromFileStatement.java | 106 ++++++++++++++++++ .../schemaops/values/StagedFilesField.java | 69 ++++++++++++ .../relational/sqldom/common/Clause.java | 5 +- 17 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesFieldValueVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/values/StagedFilesField.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java index 7ba38d2c0b5..d81f1212b48 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java @@ -26,4 +26,9 @@ ) public interface AvroFileFormatAbstract extends FileFormat { + @Override + default String getFormatName() + { + return "AVRO"; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java index fb7aae5d5ca..2fd1de0fcea 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java @@ -16,6 +16,8 @@ import org.immutables.value.Value; +import java.util.Optional; + @Value.Immutable @Value.Style( typeAbstract = "*Abstract", @@ -26,4 +28,23 @@ ) public interface CsvFileFormatAbstract extends FileFormat { + Optional fieldDelimiter(); + + Optional encoding(); + + Optional nullMarker(); + + Optional quote(); + + Optional skipLeadingRows(); + + Optional maxBadRecords(); + + Optional compression(); + + @Override + default String getFormatName() + { + return "CSV"; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java index 90085b358c4..180b38eae1b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java @@ -16,4 +16,5 @@ public interface FileFormat { + String getFormatName(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java index 40d78c1730d..b10d531f308 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java @@ -16,6 +16,8 @@ import org.immutables.value.Value; +import java.util.Optional; + @Value.Immutable @Value.Style( typeAbstract = "*Abstract", @@ -26,4 +28,13 @@ ) public interface JsonFileFormatAbstract extends FileFormat { + Optional maxBadRecords(); + + Optional compression(); + + @Override + default String getFormatName() + { + return "JSON"; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java index d03d72a2306..4d0dbac3579 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java @@ -26,4 +26,9 @@ ) public interface ParquetFileFormatAbstract extends FileFormat { + @Override + default String getFormatName() + { + return "PARQUET"; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/pom.xml b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/pom.xml index 3c9d6c9f7f4..c18fc78832e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/pom.xml +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/pom.xml @@ -56,6 +56,12 @@ legend-engine-xt-persistence-component-relational-ansi + + + org.immutables + value + + com.google.cloud @@ -66,17 +72,37 @@ commons-logging commons-logging + + com.google.errorprone + error_prone_annotations + + + com.google.code.findbugs + jsr305 + com.google.cloud google-cloud-core provided + + + com.google.code.findbugs + jsr305 + + com.google.auth google-auth-library-oauth2-http provided + + + com.google.code.findbugs + jsr305 + + org.slf4j diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index ba31be6adad..04401ba1973 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -21,13 +21,18 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.PartitionKey; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.logicalplan.operations.Alter; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Truncate; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.DatetimeValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.RelationalSink; @@ -45,12 +50,17 @@ import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.BatchEndTimestampVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.BatchStartTimestampVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.ClusterKeyVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.CopyVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.DatetimeValueVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.DeleteVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.FieldVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.PartitionKeyVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.SQLCreateVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.SchemaDefinitionVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.StagedFilesDatasetReferenceVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.StagedFilesDatasetVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.StagedFilesFieldValueVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.StagedFilesSelectionVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.TruncateVisitor; import org.finos.legend.engine.persistence.components.relational.sql.TabularData; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; @@ -97,6 +107,11 @@ public class BigQuerySink extends AnsiSqlSink logicalPlanVisitorByClass.put(DatetimeValue.class, new DatetimeValueVisitor()); logicalPlanVisitorByClass.put(BatchEndTimestamp.class, new BatchEndTimestampVisitor()); logicalPlanVisitorByClass.put(BatchStartTimestamp.class, new BatchStartTimestampVisitor()); + logicalPlanVisitorByClass.put(Copy.class, new CopyVisitor()); + logicalPlanVisitorByClass.put(StagedFilesFieldValue.class, new StagedFilesFieldValueVisitor()); + logicalPlanVisitorByClass.put(StagedFilesDataset.class, new StagedFilesDatasetVisitor()); + logicalPlanVisitorByClass.put(StagedFilesSelection.class, new StagedFilesSelectionVisitor()); + logicalPlanVisitorByClass.put(StagedFilesDatasetReference.class, new StagedFilesDatasetReferenceVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); Map> implicitDataTypeMapping = new HashMap<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java new file mode 100644 index 00000000000..70e5a26c1ba --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java @@ -0,0 +1,34 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +package org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets; + +import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; +import org.immutables.value.Value; + + +@Value.Immutable +@Value.Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface BigQueryStagedFilesDatasetPropertiesAbstract extends StagedFilesDatasetProperties +{ + FileFormat fileFormat(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java new file mode 100644 index 00000000000..502c92e22f0 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java @@ -0,0 +1,41 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements.CopyStatement; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class CopyVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, Copy current, VisitorContext context) + { + CopyStatement copyStatement = new CopyStatement(); + prev.push(copyStatement); + + List logicalPlanNodes = new ArrayList<>(); + logicalPlanNodes.add(current.sourceDataset()); + logicalPlanNodes.add(current.targetDataset()); + return new VisitorResult(copyStatement, logicalPlanNodes); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java new file mode 100644 index 00000000000..5cc0a42174a --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -0,0 +1,66 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.common.CsvFileFormat; +import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.JsonFileFormat; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.HashMap; +import java.util.Map; + + +public class StagedFilesDatasetReferenceVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference current, VisitorContext context) + { + if (!(current.properties() instanceof BigQueryStagedFilesDatasetProperties)) + { + throw new IllegalStateException("Only BigQueryStagedFilesDatasetProperties are supported for BigQuery Sink"); + } + BigQueryStagedFilesDatasetProperties datasetProperties = (BigQueryStagedFilesDatasetProperties) current.properties(); + + Map loadOptionsMap = new HashMap<>(); + FileFormat fileFormat = datasetProperties.fileFormat(); + loadOptionsMap.put("uris", "[" + String.join(",", datasetProperties.files()) + "]"); + loadOptionsMap.put("format", fileFormat.getFormatName()); + if (fileFormat instanceof CsvFileFormat) + { + CsvFileFormat csvFileFormat = (CsvFileFormat) fileFormat; + csvFileFormat.fieldDelimiter().ifPresent(property -> loadOptionsMap.put("field_delimiter", property)); + csvFileFormat.encoding().ifPresent(property -> loadOptionsMap.put("encoding", property)); + csvFileFormat.nullMarker().ifPresent(property -> loadOptionsMap.put("null_marker", property)); + csvFileFormat.quote().ifPresent(property -> loadOptionsMap.put("quote", property)); + csvFileFormat.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property.toString())); + csvFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property.toString())); + csvFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); + } + else if (fileFormat instanceof JsonFileFormat) + { + JsonFileFormat jsonFileFormat = (JsonFileFormat) fileFormat; + jsonFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property.toString())); + jsonFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); + } + prev.push(loadOptionsMap); + + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java new file mode 100644 index 00000000000..a709ad514fe --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java @@ -0,0 +1,40 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; +import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; + +import java.util.List; + +public class StagedFilesDatasetVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDataset current, VisitorContext context) + { + List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); + StagedFilesSelection selection = StagedFilesSelection.builder() + .source(current) + .addAllFields(allColumns) + .alias(current.datasetReference().alias()) + .build(); + return new StagedFilesSelectionVisitor().visit(prev, selection, context); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesFieldValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesFieldValueVisitor.java new file mode 100644 index 00000000000..e2841175d99 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesFieldValueVisitor.java @@ -0,0 +1,36 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.BigQueryDataTypeMapping; +import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.values.StagedFilesField; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class StagedFilesFieldValueVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesFieldValue current, VisitorContext context) + { + DataType dataType = new BigQueryDataTypeMapping().getDataType(current.fieldType()); + StagedFilesField field = new StagedFilesField(context.quoteIdentifier(), current.fieldName(), dataType); + prev.push(field); + return new VisitorResult(null); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java new file mode 100644 index 00000000000..0e57ce3c7f5 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java @@ -0,0 +1,42 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements.SelectFromFileStatement; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.ArrayList; +import java.util.List; + +public class StagedFilesSelectionVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, StagedFilesSelection current, VisitorContext context) + { + SelectFromFileStatement selectFromFileStatement = new SelectFromFileStatement(); + prev.push(selectFromFileStatement); + + List logicalPlanNodeList = new ArrayList<>(); + logicalPlanNodeList.add(current.source().datasetReference()); + logicalPlanNodeList.addAll(current.fields()); + + return new VisitorResult(selectFromFileStatement, logicalPlanNodeList); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java new file mode 100644 index 00000000000..f8e9ae4aa0e --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java @@ -0,0 +1,77 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause.LOAD_DATA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause.OVERWRITE; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class CopyStatement implements DMLStatement +{ + private Table table; + private SelectFromFileStatement selectFromFileStatement; + + public CopyStatement() + { + } + + /* + Copy GENERIC PLAN for Big Query: + LOAD DATA OVERWRITE table_name + (COLUMN_LIST) + FROM FILES (LOAD_OPTIONS) + */ + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + validate(); + builder.append(LOAD_DATA.get()); + builder.append(WHITE_SPACE); + builder.append(OVERWRITE.get()); + builder.append(WHITE_SPACE); + + selectFromFileStatement.genSql(builder); + } + + @Override + public void push(Object node) + { + if (node instanceof Table) + { + table = (Table) node; + } + else if (node instanceof SelectFromFileStatement) + { + selectFromFileStatement = (SelectFromFileStatement) node; + } + } + + void validate() throws SqlDomException + { + if (selectFromFileStatement == null) + { + throw new SqlDomException("selectFromFileStatement is mandatory for Copy Table Command"); + } + + if (table == null) + { + throw new SqlDomException("table is mandatory for Copy Table Command"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java new file mode 100644 index 00000000000..c80a1c6e3c3 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java @@ -0,0 +1,106 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.select.SelectExpression; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.ASSIGNMENT_OPERATOR; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class SelectFromFileStatement extends SelectExpression +{ + private final List columns; + private Map loadOptions; + + public SelectFromFileStatement() + { + columns = new ArrayList<>(); + } + + /* + Select from file GENERIC PLAN for Big Query: + (COLUMN_LIST) + FROM FILES (LOAD_OPTIONS) + */ + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + validate(); + + builder.append(OPEN_PARENTHESIS); + SqlGen.genSqlList(builder, columns, WHITE_SPACE, COMMA); + builder.append(CLOSING_PARENTHESIS); + + builder.append(WHITE_SPACE); + builder.append(Clause.FROM_FILES.get()); + builder.append(WHITE_SPACE); + + if (loadOptions != null && loadOptions.size() > 0) + { + builder.append(OPEN_PARENTHESIS); + + int counter = 0; + for (String option : loadOptions.keySet()) + { + counter++; + builder.append(option); + builder.append(ASSIGNMENT_OPERATOR); + builder.append(SqlGenUtils.singleQuote(loadOptions.get(option))); + if (counter < columns.size()) + { + builder.append(COMMA + WHITE_SPACE); + } + } + builder.append(CLOSING_PARENTHESIS); + } + } + + @Override + public void push(Object node) + { + if (node instanceof Value) + { + columns.add((Value) node); + } + if (node instanceof Map) + { + loadOptions = (Map) node; + } + } + + void validate() throws SqlDomException + { + if (!loadOptions.containsKey("uris")) + { + throw new SqlDomException("uris are mandatory for loading from files"); + } + if (!loadOptions.containsKey("format")) + { + throw new SqlDomException("format is mandatory for loading from files"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/values/StagedFilesField.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/values/StagedFilesField.java new file mode 100644 index 00000000000..0df2a15b8e8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/values/StagedFilesField.java @@ -0,0 +1,69 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.values; + +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.schema.DataType; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; +import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; + +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; + +public class StagedFilesField extends Value +{ + private String columnName; + + private DataType dataType; + + public StagedFilesField(String quoteIdentifier, String columnName, DataType datatype) + { + super(quoteIdentifier); + this.columnName = columnName; + this.dataType = datatype; + } + + @Override + public void genSql(StringBuilder builder) throws SqlDomException + { + genSqlWithoutAlias(builder); + super.genSql(builder); + } + + @Override + public void genSqlWithoutAlias(StringBuilder builder) throws SqlDomException + { + validate(); + builder.append(SqlGenUtils.getQuotedField(columnName, getQuoteIdentifier())); + builder.append(WHITE_SPACE); + dataType.genSql(builder); + } + + @Override + public void push(Object node) + { + } + + void validate() throws SqlDomException + { + if (columnName == null) + { + throw new SqlDomException("columnName is empty"); + } + if (dataType == null) + { + throw new SqlDomException("dataType is empty"); + } + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index 1086d1e2c06..46c2e5c3574 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -58,7 +58,10 @@ public enum Clause NOT_ENFORCED("NOT ENFORCED"), DATA_TYPE("DATA TYPE"), CONVERT("CONVERT"), - ARRAY("ARRAY"); + ARRAY("ARRAY"), + LOAD_DATA("LOAD DATA"), + OVERWRITE("OVERWRITE"), + FROM_FILES("FROM FILES"); private final String clause; From 68af4a85c6aa11e54188cc6283901821494c0486 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Thu, 14 Sep 2023 11:39:20 +0800 Subject: [PATCH 38/57] Addressed Code Review Comments --- .../persistence/components/planner/BulkLoadPlanner.java | 8 +++++--- .../engine/persistence/components/planner/Planner.java | 2 +- .../components/util/AppendLogDatasetUtilsAnsiTest.java | 2 +- .../components/util/AppendLogDatasetUtilsTest.java | 2 +- .../util/AppendLogDatasetUtilsBigQueryTest.java | 2 +- .../util/AppendLogDatasetUtilsSnowflakeTest.java | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index ca37be1bad8..70fd5fe1635 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -35,6 +35,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; @@ -125,7 +126,7 @@ public LogicalPlan buildLogicalPlanForPostActions(Resources resources) public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); - String batchSourceInfo = jsonifyBatchSourceInfo(stagedFilesDataset.stagedFilesDatasetProperties().files()); + String batchSourceInfo = jsonifyBatchSourceInfo(stagedFilesDataset.stagedFilesDatasetProperties()); StringValue appendDatasetName = StringValue.of(mainDataset().datasetReference().name()); StringValue batchIdValue = StringValue.of(options().appendBatchIdValue().orElseThrow(IllegalStateException::new)); @@ -163,9 +164,10 @@ private Selection getRowsBasedOnAppendTimestamp(Dataset dataset, String field, S return Selection.builder().source(dataset.datasetReference()).condition(condition).addFields(countFunction).build(); } - public static String jsonifyBatchSourceInfo(List files) + private String jsonifyBatchSourceInfo(StagedFilesDatasetProperties stagedFilesDatasetProperties) { - Map batchSourceMap = new HashMap(); + List files = stagedFilesDatasetProperties.files(); + Map batchSourceMap = new HashMap(); batchSourceMap.put("files", files); ObjectMapper objectMapper = new ObjectMapper(); try diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index d8ba78d7531..00dc131ffc9 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -26,12 +26,12 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestampAbstract; +import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.LockInfoUtils; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java index 06cb7d2b807..6236a508466 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java @@ -30,7 +30,7 @@ public String getExpectedSqlForAppendMetadata() public String getExpectedSqlForAppendMetadataUpperCase() { return "INSERT INTO APPENDLOG_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java index b16c2eeb22c..50fe207b5bc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java @@ -66,7 +66,7 @@ public void testInsertAppendMetadataInUpperCase() { AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); StringValue batchIdValue = StringValue.of("batch_id_123"); - StringValue appendLogTableName = StringValue.of("appeng_log_table_name"); + StringValue appendLogTableName = StringValue.of("APPEND_LOG_TABLE_NAME"); StringValue batchStatusValue = StringValue.of(IngestStatus.SUCCEEDED.toString()); StringValue batchLineageValue = StringValue.of("my_lineage_value"); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java index 82bd262d59e..bc3d0919e2c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java @@ -31,7 +31,7 @@ public String getExpectedSqlForAppendMetadataUpperCase() { return "INSERT INTO APPENDLOG_BATCH_METADATA " + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + - "(SELECT 'batch_id_123','appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java index df4c1a12389..27cf900634e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java @@ -30,7 +30,7 @@ public String getExpectedSqlForAppendMetadata() public String getExpectedSqlForAppendMetadataUpperCase() { return "INSERT INTO APPENDLOG_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() From 0a5b14a6658c1e415e209f0fb0d5e985ebce0820 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 14 Sep 2023 15:26:31 +0800 Subject: [PATCH 39/57] Clean up --- .../common/AvroFileFormatAbstract.java | 6 +++ .../common/CsvFileFormatAbstract.java | 6 +++ .../components/common/FileFormat.java | 2 + .../components/common/FileFormatVisitor.java | 26 +++++++++++ .../common/JsonFileFormatAbstract.java | 6 +++ .../common/ParquetFileFormatAbstract.java | 6 +++ .../components/planner/BulkLoadPlanner.java | 4 +- .../StagedFilesDatasetReferenceVisitor.java | 46 +++++++++++++++---- 8 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java index d81f1212b48..647d5ea9152 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java @@ -31,4 +31,10 @@ default String getFormatName() { return "AVRO"; } + + @Override + default T accept(FileFormatVisitor visitor) + { + return visitor.visitAvroFileFormat(this); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java index 2fd1de0fcea..d9f6e7e4138 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java @@ -47,4 +47,10 @@ default String getFormatName() { return "CSV"; } + + @Override + default T accept(FileFormatVisitor visitor) + { + return visitor.visitCsvFileFormat(this); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java index 180b38eae1b..3214bc18ed5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java @@ -17,4 +17,6 @@ public interface FileFormat { String getFormatName(); + + T accept(FileFormatVisitor visitor); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java new file mode 100644 index 00000000000..78a755be835 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java @@ -0,0 +1,26 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.common; + +public interface FileFormatVisitor +{ + T visitCsvFileFormat(CsvFileFormatAbstract csvFileFormat); + + T visitJsonFileFormat(JsonFileFormatAbstract jsonFileFormat); + + T visitAvroFileFormat(AvroFileFormatAbstract avroFileFormat); + + T visitParquetFileFormat(ParquetFileFormatAbstract parquetFileFormat); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java index b10d531f308..590cbcc3f6c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java @@ -37,4 +37,10 @@ default String getFormatName() { return "JSON"; } + + @Override + default T accept(FileFormatVisitor visitor) + { + return visitor.visitJsonFileFormat(this); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java index 4d0dbac3579..c74d0eaad37 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java @@ -31,4 +31,10 @@ default String getFormatName() { return "PARQUET"; } + + @Override + default T accept(FileFormatVisitor visitor) + { + return visitor.visitParquetFileFormat(this); + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 629aabdc34b..97756968e7e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -72,13 +72,13 @@ class BulkLoadPlanner extends Planner allowExtraFieldsWhileCopying = capabilities.contains(Capability.ALLOW_EXTRA_FIELDS_WHILE_COPYING); if (!allowExtraFieldsWhileCopying) { - tempDataset = DatasetDefinition.builder() + tempDataset = datasets.tempDataset().orElse(DatasetDefinition.builder() .schema(datasets.stagingDataset().schema()) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) .name(datasets.mainDataset().datasetReference().name() + UNDERSCORE + TEMP_DATASET_BASE_NAME) .alias(TEMP_DATASET_BASE_NAME) - .build(); + .build()); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java index 5cc0a42174a..7ff25fac70b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -14,9 +14,12 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; -import org.finos.legend.engine.persistence.components.common.CsvFileFormat; +import org.finos.legend.engine.persistence.components.common.AvroFileFormatAbstract; +import org.finos.legend.engine.persistence.components.common.CsvFileFormatAbstract; import org.finos.legend.engine.persistence.components.common.FileFormat; -import org.finos.legend.engine.persistence.components.common.JsonFileFormat; +import org.finos.legend.engine.persistence.components.common.FileFormatVisitor; +import org.finos.legend.engine.persistence.components.common.JsonFileFormatAbstract; +import org.finos.legend.engine.persistence.components.common.ParquetFileFormatAbstract; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; @@ -42,9 +45,24 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu FileFormat fileFormat = datasetProperties.fileFormat(); loadOptionsMap.put("uris", "[" + String.join(",", datasetProperties.files()) + "]"); loadOptionsMap.put("format", fileFormat.getFormatName()); - if (fileFormat instanceof CsvFileFormat) + fileFormat.accept(new RetrieveLoadOptions(loadOptionsMap)); + prev.push(loadOptionsMap); + + return new VisitorResult(null); + } + + private static class RetrieveLoadOptions implements FileFormatVisitor + { + private Map loadOptionsMap; + + RetrieveLoadOptions(Map loadOptionsMap) + { + this.loadOptionsMap = loadOptionsMap; + } + + @Override + public Void visitCsvFileFormat(CsvFileFormatAbstract csvFileFormat) { - CsvFileFormat csvFileFormat = (CsvFileFormat) fileFormat; csvFileFormat.fieldDelimiter().ifPresent(property -> loadOptionsMap.put("field_delimiter", property)); csvFileFormat.encoding().ifPresent(property -> loadOptionsMap.put("encoding", property)); csvFileFormat.nullMarker().ifPresent(property -> loadOptionsMap.put("null_marker", property)); @@ -52,15 +70,27 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu csvFileFormat.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property.toString())); csvFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property.toString())); csvFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); + return null; } - else if (fileFormat instanceof JsonFileFormat) + + @Override + public Void visitJsonFileFormat(JsonFileFormatAbstract jsonFileFormat) { - JsonFileFormat jsonFileFormat = (JsonFileFormat) fileFormat; jsonFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property.toString())); jsonFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); + return null; } - prev.push(loadOptionsMap); - return new VisitorResult(null); + @Override + public Void visitAvroFileFormat(AvroFileFormatAbstract avroFileFormat) + { + return null; + } + + @Override + public Void visitParquetFileFormat(ParquetFileFormatAbstract parquetFileFormat) + { + return null; + } } } From 998b78f0015914c9c59006ee118edf48e63923f5 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 14 Sep 2023 17:13:45 +0800 Subject: [PATCH 40/57] Add basic tests for bulk load for big query --- .../components/planner/BulkLoadPlanner.java | 6 +- .../StagedFilesDatasetReferenceVisitor.java | 15 +- .../schemaops/statements/CopyStatement.java | 2 + .../statements/SelectFromFileStatement.java | 55 ++++- .../components/ingestmode/BulkLoadTest.java | 209 ++++++++++++++++++ .../ingestmode/bulkload/BulkLoadTest.java | 2 +- 6 files changed, 266 insertions(+), 23 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 97756968e7e..35cc05bc9c6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -76,7 +76,7 @@ class BulkLoadPlanner extends Planner .schema(datasets.stagingDataset().schema()) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) - .name(datasets.mainDataset().datasetReference().name() + UNDERSCORE + TEMP_DATASET_BASE_NAME) + .name(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)) + UNDERSCORE + TEMP_DATASET_BASE_NAME) .alias(TEMP_DATASET_BASE_NAME) .build()); } @@ -134,12 +134,12 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resour List fieldsToSelectFromStage = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); List fieldsToInsertIntoTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelectFromStage).build(); - operations.add(Copy.of(mainDataset(), selectStage, fieldsToInsertIntoTemp)); + operations.add(Copy.of(tempDataset, selectStage, fieldsToInsertIntoTemp)); // Operation 2: Transfer from temp table into target table, adding extra columns at the same time List fieldsToSelectFromTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); - List fieldsToInsertIntoMain = new ArrayList<>(mainDataset().schemaReference().fieldValues()); + List fieldsToInsertIntoMain = new ArrayList<>(tempDataset.schemaReference().fieldValues()); if (ingestMode().generateDigest()) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java index 7ff25fac70b..c24c3bb7128 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -41,21 +41,22 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu } BigQueryStagedFilesDatasetProperties datasetProperties = (BigQueryStagedFilesDatasetProperties) current.properties(); - Map loadOptionsMap = new HashMap<>(); + Map loadOptionsMap = new HashMap<>(); FileFormat fileFormat = datasetProperties.fileFormat(); - loadOptionsMap.put("uris", "[" + String.join(",", datasetProperties.files()) + "]"); loadOptionsMap.put("format", fileFormat.getFormatName()); fileFormat.accept(new RetrieveLoadOptions(loadOptionsMap)); prev.push(loadOptionsMap); + prev.push(datasetProperties.files()); + return new VisitorResult(null); } private static class RetrieveLoadOptions implements FileFormatVisitor { - private Map loadOptionsMap; + private Map loadOptionsMap; - RetrieveLoadOptions(Map loadOptionsMap) + RetrieveLoadOptions(Map loadOptionsMap) { this.loadOptionsMap = loadOptionsMap; } @@ -67,8 +68,8 @@ public Void visitCsvFileFormat(CsvFileFormatAbstract csvFileFormat) csvFileFormat.encoding().ifPresent(property -> loadOptionsMap.put("encoding", property)); csvFileFormat.nullMarker().ifPresent(property -> loadOptionsMap.put("null_marker", property)); csvFileFormat.quote().ifPresent(property -> loadOptionsMap.put("quote", property)); - csvFileFormat.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property.toString())); - csvFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property.toString())); + csvFileFormat.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property)); + csvFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); csvFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); return null; } @@ -76,7 +77,7 @@ public Void visitCsvFileFormat(CsvFileFormatAbstract csvFileFormat) @Override public Void visitJsonFileFormat(JsonFileFormatAbstract jsonFileFormat) { - jsonFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property.toString())); + jsonFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); jsonFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); return null; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java index f8e9ae4aa0e..e447bfac457 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java @@ -45,6 +45,8 @@ public void genSql(StringBuilder builder) throws SqlDomException builder.append(WHITE_SPACE); builder.append(OVERWRITE.get()); builder.append(WHITE_SPACE); + table.genSqlWithoutAlias(builder); + builder.append(WHITE_SPACE); selectFromFileStatement.genSql(builder); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java index c80a1c6e3c3..6292ebdf73f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java @@ -27,14 +27,18 @@ import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.ASSIGNMENT_OPERATOR; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_SQUARE_BRACKET; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.EMPTY; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_SQUARE_BRACKET; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; public class SelectFromFileStatement extends SelectExpression { private final List columns; - private Map loadOptions; + private List files; + private Map loadOptions; public SelectFromFileStatement() { @@ -52,31 +56,54 @@ public void genSql(StringBuilder builder) throws SqlDomException validate(); builder.append(OPEN_PARENTHESIS); - SqlGen.genSqlList(builder, columns, WHITE_SPACE, COMMA); + SqlGen.genSqlList(builder, columns, EMPTY, COMMA); builder.append(CLOSING_PARENTHESIS); builder.append(WHITE_SPACE); builder.append(Clause.FROM_FILES.get()); builder.append(WHITE_SPACE); - if (loadOptions != null && loadOptions.size() > 0) + builder.append(OPEN_PARENTHESIS); + builder.append("uris"); + builder.append(ASSIGNMENT_OPERATOR); + builder.append(OPEN_SQUARE_BRACKET); + for (int ctr = 0; ctr < files.size(); ctr++) { - builder.append(OPEN_PARENTHESIS); + builder.append(SqlGenUtils.singleQuote(files.get(ctr))); + if (ctr < (files.size() - 1)) + { + builder.append(COMMA); + } + } + builder.append(CLOSING_SQUARE_BRACKET); - int counter = 0; + if (loadOptions != null && loadOptions.size() > 0) + { + builder.append(COMMA); + builder.append(WHITE_SPACE); + int ctr = 0; for (String option : loadOptions.keySet()) { - counter++; + ctr++; builder.append(option); builder.append(ASSIGNMENT_OPERATOR); - builder.append(SqlGenUtils.singleQuote(loadOptions.get(option))); - if (counter < columns.size()) + if (loadOptions.get(option) instanceof String) + { + builder.append(SqlGenUtils.singleQuote(loadOptions.get(option))); + } + else + { + // number + builder.append(loadOptions.get(option)); + } + + if (ctr < loadOptions.size()) { builder.append(COMMA + WHITE_SPACE); } } - builder.append(CLOSING_PARENTHESIS); } + builder.append(CLOSING_PARENTHESIS); } @Override @@ -88,15 +115,19 @@ public void push(Object node) } if (node instanceof Map) { - loadOptions = (Map) node; + loadOptions = (Map) node; + } + if (node instanceof List) + { + files = (List) node; } } void validate() throws SqlDomException { - if (!loadOptions.containsKey("uris")) + if (files == null || files.isEmpty()) { - throw new SqlDomException("uris are mandatory for loading from files"); + throw new SqlDomException("files are mandatory for loading from files"); } if (!loadOptions.containsKey("format")) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java new file mode 100644 index 00000000000..3d40c64054d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -0,0 +1,209 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.ingestmode; + +import org.finos.legend.engine.persistence.components.common.CsvFileFormat; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.StatisticName; +import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; +import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; +import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.time.Clock; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_DELETED; +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_TERMINATED; +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_UPDATED; + +public class BulkLoadTest +{ + private static final String APPEND_TIME = "append_time"; + private static final String DIGEST = "digest"; + private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; + private static final String LINEAGE = "lake_lineage"; + private static final String col_int = "col_int"; + private static final String col_string = "col_string"; + private static final String col_decimal = "col_decimal"; + private static final String col_datetime = "col_datetime"; + private static final String col_variant = "col_variant"; + + private static Field col1 = Field.builder() + .name(col_int) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .primaryKey(true) + .build(); + private static Field col2 = Field.builder() + .name(col_string) + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .build(); + private static Field col3 = Field.builder() + .name(col_decimal) + .type(FieldType.of(DataType.DECIMAL, 5, 2)) + .build(); + private static Field col4 = Field.builder() + .name(col_datetime) + .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) + .build(); + + private static Field col5 = Field.builder() + .name(col_variant) + .type(FieldType.of(DataType.VARIANT, Optional.empty(), Optional.empty())) + .build(); + + private List filesList = Arrays.asList("/path/xyz/file1.csv", "/path/xyz/file2.csv"); + + protected final ZonedDateTime fixedZonedDateTime_2000_01_01 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); + protected final Clock fixedClock_2000_01_01 = Clock.fixed(fixedZonedDateTime_2000_01_01.toInstant(), ZoneOffset.UTC); + + @Test + public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() + { + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(false) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(CsvFileFormat.builder().build()) + .addAllFiles(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + + "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`append_time` DATETIME)"; + + String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + + "FROM FILES (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; + + String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `append_time`) " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); + Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + + Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + } + + @Test + public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() + { + BulkLoad bulkLoad = BulkLoad.builder() + .generateDigest(false) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(CsvFileFormat.builder() + .encoding("UTF8") + .maxBadRecords(100L) + .nullMarker("NULL") + .quote("'") + .compression("GZIP") + .fieldDelimiter(",") + .skipLeadingRows(1L) + .build()) + .addAllFiles(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + + "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`append_time` DATETIME)"; + + String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + + "FROM FILES " + + "(uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], max_bad_records=100, quote=''', skip_leading_rows=1, format='CSV', encoding='UTF8', compression='GZIP', field_delimiter=',', null_marker='NULL')"; + + String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `append_time`) " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); + Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + + Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index feea1e4f3d4..0ba86721089 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -100,7 +100,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() .fileFormat(CsvFileFormat.builder().build()) - .addAllFiles(Collections.singletonList(filePath)).build()) + .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); From c1a1e5456dde40db1f705ef28c3d11b3edf17116 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Fri, 15 Sep 2023 11:40:36 +0800 Subject: [PATCH 41/57] Refactor Code to rename AppendLog to BulkLoad --- .../components/common/DatasetsAbstract.java | 4 +- .../datasets/DatasetCaseConverter.java | 24 +++++------ .../datasets/DatasetsCaseConverter.java | 6 +-- .../values/BulkLoadBatchIdValueAbstract.java | 31 +++++++++++++ .../BulkLoadBatchStatusValueAbstract.java | 31 +++++++++++++ .../components/planner/BulkLoadPlanner.java | 24 +++++------ .../components/planner/Planner.java | 10 ++--- .../components/planner/Planners.java | 9 ---- ...a => BulkLoadMetadataDatasetAbstract.java} | 6 +-- ...aUtils.java => BulkLoadMetadataUtils.java} | 38 ++++++++-------- .../components/util/LogicalPlanUtils.java | 2 +- .../transformer/AbstractTransformer.java | 2 + .../transformer/LogicalPlanVisitor.java | 4 ++ .../components/transformer/Transformer.java | 4 ++ .../relational/ansi/AnsiSqlSink.java | 7 +++ .../visitors/BulkLoadBatchIdValueVisitor.java | 31 +++++++++++++ .../BulkLoadBatchStatusValueVisitor.java | 31 +++++++++++++ ...java => BulkLoadDatasetUtilsAnsiTest.java} | 14 +++--- ...est.java => BulkLoadDatasetUtilsTest.java} | 43 +++++++++---------- ... => BulkLoadDatasetUtilsBigQueryTest.java} | 14 +++--- .../components/relational/api/ApiUtils.java | 6 +-- .../api/RelationalGeneratorAbstract.java | 12 +++--- .../api/RelationalIngestorAbstract.java | 9 ++-- .../ingestmode/bulkload/BulkLoadTest.java | 32 +++++++------- .../components/ingestmode/BulkLoadTest.java | 14 +++--- ...=> BulkLoadDatasetUtilsSnowflakeTest.java} | 14 +++--- 26 files changed, 271 insertions(+), 151 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchStatusValueAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/{AppendLogMetadataDatasetAbstract.java => BulkLoadMetadataDatasetAbstract.java} (95%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/{AppendLogMetadataUtils.java => BulkLoadMetadataUtils.java} (69%) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchStatusValueVisitor.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/{AppendLogDatasetUtilsAnsiTest.java => BulkLoadDatasetUtilsAnsiTest.java} (72%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/{AppendLogDatasetUtilsTest.java => BulkLoadDatasetUtilsTest.java} (59%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/{AppendLogDatasetUtilsBigQueryTest.java => BulkLoadDatasetUtilsBigQueryTest.java} (76%) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/{AppendLogDatasetUtilsSnowflakeTest.java => BulkLoadDatasetUtilsSnowflakeTest.java} (73%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java index d8c58c6bb86..42b4fd7ef3c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/DatasetsAbstract.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.common; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.immutables.value.Value.Immutable; @@ -42,7 +42,7 @@ public interface DatasetsAbstract Optional metadataDataset(); - Optional appendLogMetadataDataset(); + Optional bulkLoadMetadataDataset(); Optional tempDataset(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java index 6e7deb1adbf..bf3415061b5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetCaseConverter.java @@ -14,7 +14,7 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -152,18 +152,18 @@ public MetadataDataset applyCaseOnMetadataDataset(MetadataDataset metadataDatase .build(); } - public AppendLogMetadataDataset applyCaseOnAppendLogMetadataDataset(AppendLogMetadataDataset appendLogMetadataDataset, Function strategy) + public BulkLoadMetadataDataset applyCaseOnBulkLoadMetadataDataset(BulkLoadMetadataDataset bulkLoadMetadataDataset, Function strategy) { - return AppendLogMetadataDataset.builder() - .database(appendLogMetadataDataset.database().map(strategy)) - .group(appendLogMetadataDataset.group().map(strategy)) - .name(strategy.apply(appendLogMetadataDataset.name())) - .batchIdField(strategy.apply(appendLogMetadataDataset.batchIdField())) - .tableNameField(strategy.apply(appendLogMetadataDataset.tableNameField())) - .batchStartTimeField(strategy.apply(appendLogMetadataDataset.batchStartTimeField())) - .batchEndTimeField(strategy.apply(appendLogMetadataDataset.batchEndTimeField())) - .batchStatusField(strategy.apply(appendLogMetadataDataset.batchStatusField())) - .batchSourceInfoField(strategy.apply(appendLogMetadataDataset.batchSourceInfoField())) + return BulkLoadMetadataDataset.builder() + .database(bulkLoadMetadataDataset.database().map(strategy)) + .group(bulkLoadMetadataDataset.group().map(strategy)) + .name(strategy.apply(bulkLoadMetadataDataset.name())) + .batchIdField(strategy.apply(bulkLoadMetadataDataset.batchIdField())) + .tableNameField(strategy.apply(bulkLoadMetadataDataset.tableNameField())) + .batchStartTimeField(strategy.apply(bulkLoadMetadataDataset.batchStartTimeField())) + .batchEndTimeField(strategy.apply(bulkLoadMetadataDataset.batchEndTimeField())) + .batchStatusField(strategy.apply(bulkLoadMetadataDataset.batchStatusField())) + .batchSourceInfoField(strategy.apply(bulkLoadMetadataDataset.batchSourceInfoField())) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java index 2fb6720c69c..af3f230336f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/datasets/DatasetsCaseConverter.java @@ -15,7 +15,7 @@ package org.finos.legend.engine.persistence.components.logicalplan.datasets; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -34,7 +34,7 @@ public Datasets applyCase(Datasets datasets, Function strategy) Optional tempWithDeleteIndicator = datasets.tempDatasetWithDeleteIndicator().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional stagingWithoutDuplicates = datasets.stagingDatasetWithoutDuplicates().map(dataset -> datasetCaseConverter.applyCaseOnDataset(dataset, strategy)); Optional metadata = Optional.ofNullable(datasetCaseConverter.applyCaseOnMetadataDataset(datasets.metadataDataset().orElseThrow(IllegalStateException::new), strategy)); - Optional appendLogMetadataDataset = Optional.ofNullable(datasetCaseConverter.applyCaseOnAppendLogMetadataDataset(datasets.appendLogMetadataDataset().orElseThrow(IllegalStateException::new), strategy)); + Optional bulkLoadMetadataDataset = Optional.ofNullable(datasetCaseConverter.applyCaseOnBulkLoadMetadataDataset(datasets.bulkLoadMetadataDataset().orElseThrow(IllegalStateException::new), strategy)); Optional lockInfo = Optional.ofNullable(datasetCaseConverter.applyCaseOnLockInfoDataset(datasets.lockInfoDataset().orElseThrow(IllegalStateException::new), strategy)); return Datasets.builder() @@ -44,7 +44,7 @@ public Datasets applyCase(Datasets datasets, Function strategy) .tempDatasetWithDeleteIndicator(tempWithDeleteIndicator) .stagingDatasetWithoutDuplicates(stagingWithoutDuplicates) .metadataDataset(metadata) - .appendLogMetadataDataset(appendLogMetadataDataset) + .bulkLoadMetadataDataset(bulkLoadMetadataDataset) .lockInfoDataset(lockInfo) .build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java new file mode 100644 index 00000000000..9e7b5001aad --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java @@ -0,0 +1,31 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface BulkLoadBatchIdValueAbstract extends Value +{ + BulkLoadBatchIdValue INSTANCE = BulkLoadBatchIdValue.builder().build(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchStatusValueAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchStatusValueAbstract.java new file mode 100644 index 00000000000..5d61e9938d6 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchStatusValueAbstract.java @@ -0,0 +1,31 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.logicalplan.values; + +import static org.immutables.value.Value.Immutable; +import static org.immutables.value.Value.Style; + +@Immutable +@Style( + typeAbstract = "*Abstract", + typeImmutable = "*", + jdkOnly = true, + optionalAcceptNullable = true, + strictBuilder = true +) +public interface BulkLoadBatchStatusValueAbstract extends Value +{ + BulkLoadBatchStatusValue INSTANCE = BulkLoadBatchStatusValue.builder().build(); +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 70fd5fe1635..27cc89caa0f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -30,7 +30,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; import org.finos.legend.engine.persistence.components.logicalplan.values.All; -import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -44,8 +43,9 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataUtils; +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataUtils; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; @@ -59,7 +59,7 @@ class BulkLoadPlanner extends Planner private StagedFilesDataset stagedFilesDataset; - private AppendLogMetadataDataset appendLogMetadataDataset; + private BulkLoadMetadataDataset bulkLoadMetadataDataset; BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions) { @@ -72,7 +72,7 @@ class BulkLoadPlanner extends Planner } stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); - appendLogMetadataDataset = appendLogMetadataDataset().orElseThrow(IllegalStateException::new); + bulkLoadMetadataDataset = bulkLoadMetadataDataset().orElseThrow(IllegalStateException::new); } @Override @@ -92,7 +92,7 @@ public LogicalPlan buildLogicalPlanForIngest(Resources resources, Set operations = new ArrayList<>(); operations.add(Create.of(true, mainDataset())); - operations.add(Create.of(true, appendLogMetadataDataset.get())); + operations.add(Create.of(true, bulkLoadMetadataDataset.get())); return LogicalPlan.of(operations); } @@ -125,14 +125,10 @@ public LogicalPlan buildLogicalPlanForPostActions(Resources resources) @Override public LogicalPlan buildLogicalPlanForMetadataIngest(Resources resources) { - AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); + BulkLoadMetadataUtils bulkLoadMetadataUtils = new BulkLoadMetadataUtils(bulkLoadMetadataDataset); String batchSourceInfo = jsonifyBatchSourceInfo(stagedFilesDataset.stagedFilesDatasetProperties()); - - StringValue appendDatasetName = StringValue.of(mainDataset().datasetReference().name()); - StringValue batchIdValue = StringValue.of(options().appendBatchIdValue().orElseThrow(IllegalStateException::new)); - StringValue appendBatchStatusPattern = StringValue.of(options().appendBatchStatusPattern().orElseThrow(IllegalStateException::new)); - - Insert insertMetaData = appendLogMetadataUtils.insertMetaData(batchIdValue, appendDatasetName, BatchStartTimestamp.INSTANCE, BatchEndTimestamp.INSTANCE, appendBatchStatusPattern, StringValue.of(batchSourceInfo)); + StringValue datasetName = StringValue.of(mainDataset().datasetReference().name()); + Insert insertMetaData = bulkLoadMetadataUtils.insertMetaData(datasetName, StringValue.of(batchSourceInfo)); return LogicalPlan.of(Arrays.asList(insertMetaData)); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index 00dc131ffc9..dbc17287cbd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -31,7 +31,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestampAbstract; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.Capability; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.LockInfoUtils; @@ -97,10 +97,6 @@ default boolean enableConcurrentSafety() { return false; } - - Optional appendBatchIdValue(); - - Optional appendBatchStatusPattern(); } private final Datasets datasets; @@ -137,9 +133,9 @@ protected Optional metadataDataset() return datasets.metadataDataset(); } - protected Optional appendLogMetadataDataset() + protected Optional bulkLoadMetadataDataset() { - return datasets.appendLogMetadataDataset(); + return datasets.bulkLoadMetadataDataset(); } protected Optional lockInfoDataset() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java index 48641976f0c..c6949395af4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planners.java @@ -106,15 +106,6 @@ public Planner visitBitemporalDelta(BitemporalDeltaAbstract bitemporalDelta) @Override public Planner visitBulkLoad(BulkLoadAbstract bulkLoad) { - // Validation: - if (!plannerOptions.appendBatchIdValue().isPresent()) - { - throw new IllegalArgumentException("appendBatchIdValue is mandatory for BulkLoad Ingest mode"); - } - if (!plannerOptions.appendBatchStatusPattern().isPresent()) - { - throw new IllegalArgumentException("appendBatchStatusPattern is mandatory for BulkLoad Ingest mode"); - } return new BulkLoadPlanner(datasets, (BulkLoad) bulkLoad, plannerOptions); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java similarity index 95% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java index 7798d13a34b..65054f86e39 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataDatasetAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java @@ -23,7 +23,7 @@ import java.util.Optional; -import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.DEFAULT_APPENDLOG_META_TABLE; +import static org.finos.legend.engine.persistence.components.util.LogicalPlanUtils.DEFAULT_BULK_LOAD_META_TABLE; import static org.immutables.value.Value.Immutable; import static org.immutables.value.Value.Style; import static org.immutables.value.Value.Default; @@ -37,7 +37,7 @@ optionalAcceptNullable = true, strictBuilder = true ) -public interface AppendLogMetadataDatasetAbstract +public interface BulkLoadMetadataDatasetAbstract { Optional database(); @@ -47,7 +47,7 @@ public interface AppendLogMetadataDatasetAbstract @Default default String name() { - return DEFAULT_APPENDLOG_META_TABLE; + return DEFAULT_BULK_LOAD_META_TABLE; } @Default diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java similarity index 69% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java index 08198a0bb36..161a25e345b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/AppendLogMetadataUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java @@ -24,19 +24,21 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; import java.util.ArrayList; import java.util.List; -public class AppendLogMetadataUtils +public class BulkLoadMetadataUtils { - private final AppendLogMetadataDataset appendLogMetadataDataset; + private final BulkLoadMetadataDataset bulkLoadMetadataDataset; private final Dataset dataset; - public AppendLogMetadataUtils(AppendLogMetadataDataset appendLogMetadataDataset) + public BulkLoadMetadataUtils(BulkLoadMetadataDataset bulkLoadMetadataDataset) { - this.appendLogMetadataDataset = appendLogMetadataDataset; - this.dataset = appendLogMetadataDataset.get(); + this.bulkLoadMetadataDataset = bulkLoadMetadataDataset; + this.dataset = bulkLoadMetadataDataset.get(); } /* @@ -45,37 +47,35 @@ INSERT INTO batch_metadata ("batchIdField", "tableNameField", "batchStartTimeFie (SELECT '','','{BATCH_START_TIMESTAMP_PLACEHOLDER}','{BATCH_END_TIMESTAMP_PLACEHOLDER}', '',''); */ - public Insert insertMetaData(StringValue batchIdValue, StringValue appendLogTableName, - BatchStartTimestamp batchStartTimestamp, BatchEndTimestamp batchEndTimestamp, - StringValue batchStatusValue, StringValue batchSourceInfoValue) + public Insert insertMetaData(StringValue tableNameValue, StringValue batchSourceInfoValue) { DatasetReference metaTableRef = this.dataset.datasetReference(); - FieldValue batchId = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchIdField()).build(); - FieldValue tableName = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.tableNameField()).build(); + FieldValue batchId = FieldValue.builder().datasetRef(metaTableRef).fieldName(bulkLoadMetadataDataset.batchIdField()).build(); + FieldValue tableName = FieldValue.builder().datasetRef(metaTableRef).fieldName(bulkLoadMetadataDataset.tableNameField()).build(); - FieldValue batchStartTs = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchStartTimeField()).build(); - FieldValue batchEndTs = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchEndTimeField()).build(); + FieldValue batchStartTs = FieldValue.builder().datasetRef(metaTableRef).fieldName(bulkLoadMetadataDataset.batchStartTimeField()).build(); + FieldValue batchEndTs = FieldValue.builder().datasetRef(metaTableRef).fieldName(bulkLoadMetadataDataset.batchEndTimeField()).build(); - FieldValue batchStatus = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchStatusField()).build(); - FieldValue batchSourceInfo = FieldValue.builder().datasetRef(metaTableRef).fieldName(appendLogMetadataDataset.batchSourceInfoField()).build(); + FieldValue batchStatus = FieldValue.builder().datasetRef(metaTableRef).fieldName(bulkLoadMetadataDataset.batchStatusField()).build(); + FieldValue batchSourceInfo = FieldValue.builder().datasetRef(metaTableRef).fieldName(bulkLoadMetadataDataset.batchSourceInfoField()).build(); List metaInsertFields = new ArrayList<>(); List metaSelectFields = new ArrayList<>(); metaInsertFields.add(batchId); - metaSelectFields.add(batchIdValue); + metaSelectFields.add(BulkLoadBatchIdValue.INSTANCE); metaInsertFields.add(tableName); - metaSelectFields.add(appendLogTableName); + metaSelectFields.add(tableNameValue); metaInsertFields.add(batchStartTs); - metaSelectFields.add(batchStartTimestamp); + metaSelectFields.add(BatchStartTimestamp.INSTANCE); metaInsertFields.add(batchEndTs); - metaSelectFields.add(batchEndTimestamp); + metaSelectFields.add(BatchEndTimestamp.INSTANCE); metaInsertFields.add(batchStatus); - metaSelectFields.add(batchStatusValue); + metaSelectFields.add(BulkLoadBatchStatusValue.INSTANCE); metaInsertFields.add(batchSourceInfo); metaSelectFields.add(ParseJsonFunction.builder().jsonString(batchSourceInfoValue).build()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java index 504720e5212..7ce63a97f29 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/LogicalPlanUtils.java @@ -74,7 +74,7 @@ public class LogicalPlanUtils { public static final String INFINITE_BATCH_TIME = "9999-12-31 23:59:59"; public static final String DEFAULT_META_TABLE = "batch_metadata"; - public static final String DEFAULT_APPENDLOG_META_TABLE = "appendlog_batch_metadata"; + public static final String DEFAULT_BULK_LOAD_META_TABLE = "bulk_load_batch_metadata"; public static final String DATA_SPLIT_LOWER_BOUND_PLACEHOLDER = "{DATA_SPLIT_LOWER_BOUND_PLACEHOLDER}"; public static final String DATA_SPLIT_UPPER_BOUND_PLACEHOLDER = "{DATA_SPLIT_UPPER_BOUND_PLACEHOLDER}"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java index 10470ca0d56..e9cab94d2d7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java @@ -69,6 +69,8 @@ protected VisitorContext createContext(TransformOptions options) .batchStartTimestamp(options.batchStartTimestampValue()) .batchIdPattern(options.batchIdPattern()) .infiniteBatchIdValue(options.infiniteBatchIdValue()) + .bulkLoadBatchIdValue(options.bulkLoadBatchIdValue()) + .bulkLoadBatchStatusPattern(options.bulkLoadBatchStatusPattern()) .addAllOptimizers(options.optimizers()) .quoteIdentifier(sink.quoteIdentifier()) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java index 4712964aa0e..7b59312d55f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java @@ -51,6 +51,10 @@ interface VisitorContextAbstract Optional infiniteBatchIdValue(); + Optional bulkLoadBatchIdValue(); + + Optional bulkLoadBatchStatusPattern(); + List optimizers(); String quoteIdentifier(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java index 142e2e2c2d4..be6f7a440d6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java @@ -57,6 +57,10 @@ public Clock executionTimestampClock() public abstract Optional infiniteBatchIdValue(); + public abstract Optional bulkLoadBatchIdValue(); + + public abstract Optional bulkLoadBatchStatusPattern(); + public abstract List optimizers(); @Default diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index d16de97cac7..63a2d109bb4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -61,6 +61,8 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchIdValue; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Case; import org.finos.legend.engine.persistence.components.logicalplan.values.DatetimeValue; import org.finos.legend.engine.persistence.components.logicalplan.values.DiffBinaryValueOperator; @@ -93,6 +95,8 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BatchEndTimestampVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BatchIdValueVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BatchStartTimestampVisitor; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BulkLoadBatchIdValueVisitor; +import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BulkLoadBatchStatusValueVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.CaseVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.DatasetAdditionalPropertiesVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.DatasetDefinitionVisitor; @@ -231,6 +235,9 @@ public class AnsiSqlSink extends RelationalSink logicalPlanVisitorByClass.put(Show.class, new ShowVisitor()); logicalPlanVisitorByClass.put(BatchIdValue.class, new BatchIdValueVisitor()); logicalPlanVisitorByClass.put(InfiniteBatchIdValue.class, new InfiniteBatchIdValueVisitor()); + logicalPlanVisitorByClass.put(BulkLoadBatchIdValue.class, new BulkLoadBatchIdValueVisitor()); + logicalPlanVisitorByClass.put(BulkLoadBatchStatusValue.class, new BulkLoadBatchStatusValueVisitor()); + LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); INSTANCE = new AnsiSqlSink(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java new file mode 100644 index 00000000000..faf24aac182 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java @@ -0,0 +1,31 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors; + +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.StringValue; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class BulkLoadBatchIdValueVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, BulkLoadBatchIdValue current, VisitorContext context) + { + prev.push(new StringValue(context.bulkLoadBatchIdValue().orElseThrow(IllegalStateException::new), context.quoteIdentifier())); + return new VisitorResult(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchStatusValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchStatusValueVisitor.java new file mode 100644 index 00000000000..e6665da3c69 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchStatusValueVisitor.java @@ -0,0 +1,31 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors; + +import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.StringValue; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +public class BulkLoadBatchStatusValueVisitor implements LogicalPlanVisitor +{ + @Override + public VisitorResult visit(PhysicalPlanNode prev, BulkLoadBatchStatusValue current, VisitorContext context) + { + prev.push(new StringValue(context.bulkLoadBatchStatusPattern().orElseThrow(IllegalStateException::new), context.quoteIdentifier())); + return new VisitorResult(); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java similarity index 72% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java index 6236a508466..af290f9c870 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsAnsiTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java @@ -17,20 +17,20 @@ import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; -public class AppendLogDatasetUtilsAnsiTest extends AppendLogDatasetUtilsTest +public class BulkLoadDatasetUtilsAnsiTest extends BulkLoadDatasetUtilsTest { - public String getExpectedSqlForAppendMetadata() + public String getExpectedSqlForMetadata() { - return "INSERT INTO appendlog_batch_metadata " + + return "INSERT INTO bulk_load_batch_metadata " + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\")" + - " (SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + " (SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } - public String getExpectedSqlForAppendMetadataUpperCase() + public String getExpectedSqlForMetadataUpperCase() { - return "INSERT INTO APPENDLOG_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + + "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java similarity index 59% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java index 50fe207b5bc..4a9f5c7a97c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java @@ -20,7 +20,6 @@ import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.SqlPlan; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; -import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.transformer.RelationalTransformer; import org.finos.legend.engine.persistence.components.transformer.TransformOptions; import org.junit.jupiter.api.Assertions; @@ -31,57 +30,55 @@ import java.time.ZonedDateTime; import java.util.List; -public abstract class AppendLogDatasetUtilsTest +public abstract class BulkLoadDatasetUtilsTest { private final ZonedDateTime executionZonedDateTime = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC); - private final TransformOptions transformOptions = TransformOptions.builder().executionTimestampClock(Clock.fixed(executionZonedDateTime.toInstant(), ZoneOffset.UTC)).build(); - - private AppendLogMetadataDataset appendLogMetadataDataset = AppendLogMetadataDataset.builder().build(); + private final TransformOptions transformOptions = TransformOptions + .builder() + .executionTimestampClock(Clock.fixed(executionZonedDateTime.toInstant(), ZoneOffset.UTC)) + .bulkLoadBatchIdValue("batch_id_123") + .bulkLoadBatchStatusPattern("") + .build(); + private BulkLoadMetadataDataset bulkLoadMetadataDataset = BulkLoadMetadataDataset.builder().build(); @Test - public void testInsertAppendMetadata() + public void testInsertMetadata() { - AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); - StringValue batchIdValue = StringValue.of("batch_id_123"); - StringValue appendLogTableName = StringValue.of("appeng_log_table_name"); - StringValue batchStatusValue = StringValue.of(IngestStatus.SUCCEEDED.toString()); + BulkLoadMetadataUtils bulkLoadMetadataUtils = new BulkLoadMetadataUtils(bulkLoadMetadataDataset); + StringValue bulkLoadTableName = StringValue.of("appeng_log_table_name"); StringValue batchLineageValue = StringValue.of("my_lineage_value"); - Insert operation = appendLogMetadataUtils.insertMetaData(batchIdValue, appendLogTableName, BatchStartTimestamp.INSTANCE, - BatchEndTimestampAbstract.INSTANCE, batchStatusValue, batchLineageValue); + Insert operation = bulkLoadMetadataUtils.insertMetaData(bulkLoadTableName, batchLineageValue); RelationalTransformer transformer = new RelationalTransformer(getRelationalSink(), transformOptions); LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = getExpectedSqlForAppendMetadata(); + String expectedSql = getExpectedSqlForMetadata(); Assertions.assertEquals(expectedSql, list.get(0)); } - public abstract String getExpectedSqlForAppendMetadata(); + public abstract String getExpectedSqlForMetadata(); @Test - public void testInsertAppendMetadataInUpperCase() + public void testInsertMetadataInUpperCase() { - AppendLogMetadataUtils appendLogMetadataUtils = new AppendLogMetadataUtils(appendLogMetadataDataset); - StringValue batchIdValue = StringValue.of("batch_id_123"); - StringValue appendLogTableName = StringValue.of("APPEND_LOG_TABLE_NAME"); - StringValue batchStatusValue = StringValue.of(IngestStatus.SUCCEEDED.toString()); + BulkLoadMetadataUtils bulkLoadMetadataUtils = new BulkLoadMetadataUtils(bulkLoadMetadataDataset); + StringValue bulkLoadTableName = StringValue.of("APPEND_LOG_TABLE_NAME"); StringValue batchLineageValue = StringValue.of("my_lineage_value"); - Insert operation = appendLogMetadataUtils.insertMetaData(batchIdValue, appendLogTableName, - BatchStartTimestamp.INSTANCE, BatchEndTimestampAbstract.INSTANCE, batchStatusValue, batchLineageValue); + Insert operation = bulkLoadMetadataUtils.insertMetaData(bulkLoadTableName, batchLineageValue); RelationalTransformer transformer = new RelationalTransformer(getRelationalSink(), transformOptions.withOptimizers(new UpperCaseOptimizer())); LogicalPlan logicalPlan = LogicalPlan.builder().addOps(operation).build(); SqlPlan physicalPlan = transformer.generatePhysicalPlan(logicalPlan); List list = physicalPlan.getSqlList(); - String expectedSql = getExpectedSqlForAppendMetadataUpperCase(); + String expectedSql = getExpectedSqlForMetadataUpperCase(); Assertions.assertEquals(expectedSql, list.get(0)); } - public abstract String getExpectedSqlForAppendMetadataUpperCase(); + public abstract String getExpectedSqlForMetadataUpperCase(); public abstract RelationalSink getRelationalSink(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java similarity index 76% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java index bc3d0919e2c..eb44efeaa44 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsBigQueryTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java @@ -17,21 +17,21 @@ import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; -public class AppendLogDatasetUtilsBigQueryTest extends AppendLogDatasetUtilsTest +public class BulkLoadDatasetUtilsBigQueryTest extends BulkLoadDatasetUtilsTest { - public String getExpectedSqlForAppendMetadata() + public String getExpectedSqlForMetadata() { - return "INSERT INTO appendlog_batch_metadata " + + return "INSERT INTO bulk_load_batch_metadata " + "(`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT 'batch_id_123','appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } - public String getExpectedSqlForAppendMetadataUpperCase() + public String getExpectedSqlForMetadataUpperCase() { - return "INSERT INTO APPENDLOG_BATCH_METADATA " + + return "INSERT INTO BULK_LOAD_BATCH_METADATA " + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + - "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java index bb0ff837db3..fda329b17ee 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/ApiUtils.java @@ -22,7 +22,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetsCaseConverter; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.relational.CaseConversion; -import org.finos.legend.engine.persistence.components.util.AppendLogMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.LockInfoDataset; import org.finos.legend.engine.persistence.components.util.MetadataDataset; @@ -47,12 +47,12 @@ public static Datasets enrichAndApplyCase(Datasets datasets, CaseConversion case { DatasetsCaseConverter converter = new DatasetsCaseConverter(); MetadataDataset metadataDataset = datasets.metadataDataset().orElse(MetadataDataset.builder().build()); - AppendLogMetadataDataset appendLogMetadataDataset = datasets.appendLogMetadataDataset().orElse(AppendLogMetadataDataset.builder().build()); + BulkLoadMetadataDataset bulkLoadMetadataDataset = datasets.bulkLoadMetadataDataset().orElse(BulkLoadMetadataDataset.builder().build()); LockInfoDataset lockInfoDataset = getLockInfoDataset(datasets); Datasets enrichedDatasets = datasets .withMetadataDataset(metadataDataset) .withLockInfoDataset(lockInfoDataset) - .withAppendLogMetadataDataset(appendLogMetadataDataset); + .withBulkLoadMetadataDataset(bulkLoadMetadataDataset); if (caseConversion == CaseConversion.TO_UPPER) { return converter.applyCase(enrichedDatasets, String::toUpperCase); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index 0200b626a98..8d53ae9a351 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -57,7 +57,7 @@ public abstract class RelationalGeneratorAbstract { - public static final String APPEND_BATCH_STATUS_PATTERN = "{APPEND_BATCH_STATUS_PLACEHOLDER}"; + public static final String BULK_LOAD_BATCH_STATUS_PATTERN = "{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}"; //---------- FLAGS ---------- @@ -113,12 +113,12 @@ public boolean enableConcurrentSafety() public abstract Optional infiniteBatchIdValue(); - public abstract Optional appendBatchIdValue(); + public abstract Optional bulkLoadBatchIdValue(); @Default - public String appendBatchStatusPattern() + public String bulkLoadBatchStatusPattern() { - return APPEND_BATCH_STATUS_PATTERN; + return BULK_LOAD_BATCH_STATUS_PATTERN; } //---------- FIELDS ---------- @@ -136,8 +136,6 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) - .appendBatchIdValue(appendBatchIdValue()) - .appendBatchStatusPattern(appendBatchStatusPattern()) .build(); } @@ -149,6 +147,8 @@ protected TransformOptions transformOptions() .batchStartTimestampPattern(batchStartTimestampPattern()) .batchEndTimestampPattern(batchEndTimestampPattern()) .infiniteBatchIdValue(infiniteBatchIdValue()) + .bulkLoadBatchIdValue(bulkLoadBatchIdValue()) + .bulkLoadBatchStatusPattern(bulkLoadBatchStatusPattern()) .batchIdPattern(batchIdPattern()); relationalSink().optimizerForCaseConversion(caseConversion()).ifPresent(builder::addOptimizers); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index dfb4bbfe687..9127b946ccf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -79,7 +79,7 @@ import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.TABLE_IS_NON_EMPTY; -import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.APPEND_BATCH_STATUS_PATTERN; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalGeneratorAbstract.BULK_LOAD_BATCH_STATUS_PATTERN; import static org.finos.legend.engine.persistence.components.transformer.Transformer.TransformOptionsAbstract.DATE_TIME_FORMATTER; @Immutable @@ -156,7 +156,7 @@ public Set schemaEvolutionCapabilitySet() return Collections.emptySet(); } - public abstract Optional appendBatchIdValue(); + public abstract Optional bulkLoadBatchIdValue(); //---------- FIELDS ---------- @@ -173,8 +173,6 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) - .appendBatchIdValue(appendBatchIdValue()) - .appendBatchStatusPattern(APPEND_BATCH_STATUS_PATTERN) .build(); } @@ -467,6 +465,7 @@ private void init(Datasets datasets) .batchStartTimestampPattern(BATCH_START_TS_PATTERN) .batchEndTimestampPattern(BATCH_END_TS_PATTERN) .batchIdPattern(BATCH_ID_PATTERN) + .bulkLoadBatchIdValue(bulkLoadBatchIdValue()) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, plannerOptions()); @@ -538,7 +537,7 @@ private List performBulkLoad(Datasets datasets, Transformer appendMetadata = h2Sink.executeQuery("select * from appendlog_batch_metadata").get(0); - verifyAppendMetadata(appendMetadata, filePath); + Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); + verifyBulkLoadMetadata(appendMetadata, filePath); } @Test @@ -194,7 +194,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .appendBatchIdValue("xyz123") + .bulkLoadBatchIdValue("xyz123") .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -228,8 +228,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); - Map appendMetadata = h2Sink.executeQuery("select * from appendlog_batch_metadata").get(0); - verifyAppendMetadata(appendMetadata, filePath); + Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); + verifyBulkLoadMetadata(appendMetadata, filePath); } @Test @@ -266,7 +266,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .appendBatchIdValue("xyz123") + .bulkLoadBatchIdValue("xyz123") .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -303,8 +303,8 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); - Map appendMetadata = h2Sink.executeQuery("select * from appendlog_batch_metadata").get(0); - verifyAppendMetadata(appendMetadata, filePath); + Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); + verifyBulkLoadMetadata(appendMetadata, filePath); } @Test @@ -341,7 +341,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .appendBatchIdValue("xyz123") + .bulkLoadBatchIdValue("xyz123") .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -380,8 +380,8 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER); executePlansAndVerifyForCaseConversion(ingestor, datasets, schema, expectedDataPath, expectedStats); - Map appendMetadata = h2Sink.executeQuery("select * from APPENDLOG_BATCH_METADATA").get(0); - verifyAppendMetadataForUpperCase(appendMetadata, filePath); + Map appendMetadata = h2Sink.executeQuery("select * from BULK_LOAD_BATCH_METADATA").get(0); + verifyBulkLoadMetadataForUpperCase(appendMetadata, filePath); } @Test @@ -443,7 +443,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) - .appendBatchIdValue("xyz123") + .bulkLoadBatchIdValue("xyz123") .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -505,13 +505,13 @@ RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions o .executionTimestampClock(executionTimestampClock) .cleanupStagingData(options.cleanupStagingData()) .collectStatistics(options.collectStatistics()) - .appendBatchIdValue("xyz123") + .bulkLoadBatchIdValue("xyz123") .enableConcurrentSafety(true) .caseConversion(caseConversion) .build(); } - private void verifyAppendMetadata(Map appendMetadata, String fileName) + private void verifyBulkLoadMetadata(Map appendMetadata, String fileName) { Assertions.assertEquals("xyz123", appendMetadata.get("batch_id")); Assertions.assertEquals("SUCCEEDED", appendMetadata.get("batch_status")); @@ -521,7 +521,7 @@ private void verifyAppendMetadata(Map appendMetadata, String fil Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_end_ts_utc").toString()); } - private void verifyAppendMetadataForUpperCase(Map appendMetadata, String fileName) + private void verifyBulkLoadMetadataForUpperCase(Map appendMetadata, String fileName) { Assertions.assertEquals("xyz123", appendMetadata.get("BATCH_ID")); Assertions.assertEquals("SUCCEEDED", appendMetadata.get("BATCH_STATUS")); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index cc15332194d..0010ab8e10c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -102,7 +102,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .appendBatchIdValue("batch123") + .bulkLoadBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -120,8 +120,8 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersDerived() "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage)" + " on_error = 'ABORT_STATEMENT'"; - String expectedMetadataIngestSql = "INSERT INTO appendlog_batch_metadata (\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT 'batch123','my_name','2000-01-01 00:00:00',SYSDATE(),'{APPEND_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + + "(SELECT 'batch123','my_name','2000-01-01 00:00:00',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -161,7 +161,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .ingestMode(bulkLoad) .relationalSink(SnowflakeSink.get()) .collectStatistics(true) - .appendBatchIdValue("batch123") + .bulkLoadBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -215,7 +215,7 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) - .appendBatchIdValue("batch123") + .bulkLoadBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -307,7 +307,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .appendBatchIdValue("batch123") + .bulkLoadBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); @@ -347,7 +347,7 @@ public void testBulkLoadWithDigest() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .appendBatchIdValue("batch123") + .bulkLoadBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java similarity index 73% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java index 27cf900634e..dcaf59ffcde 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/AppendLogDatasetUtilsSnowflakeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java @@ -17,20 +17,20 @@ import org.finos.legend.engine.persistence.components.relational.RelationalSink; import org.finos.legend.engine.persistence.components.relational.snowflake.SnowflakeSink; -public class AppendLogDatasetUtilsSnowflakeTest extends AppendLogDatasetUtilsTest +public class BulkLoadDatasetUtilsSnowflakeTest extends BulkLoadDatasetUtilsTest { - public String getExpectedSqlForAppendMetadata() + public String getExpectedSqlForMetadata() { - return "INSERT INTO appendlog_batch_metadata " + + return "INSERT INTO bulk_load_batch_metadata " + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } - public String getExpectedSqlForAppendMetadataUpperCase() + public String getExpectedSqlForMetadataUpperCase() { - return "INSERT INTO APPENDLOG_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'SUCCEEDED',PARSE_JSON('my_lineage_value'))"; + return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + + "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() From 68f54d697366860c9be403aef3acd266ee68f2e9 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Fri, 15 Sep 2023 12:56:36 +0800 Subject: [PATCH 42/57] Add default bulkLoad Batchid --- .../api/RelationalGeneratorAbstract.java | 7 ++++++- .../api/RelationalIngestorAbstract.java | 7 ++++++- .../components/ingestmode/BulkLoadTest.java | 18 ++++++++++++++---- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index 8d53ae9a351..3ee9b546ec0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -45,6 +45,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.UUID; @Immutable @Style( @@ -113,7 +114,11 @@ public boolean enableConcurrentSafety() public abstract Optional infiniteBatchIdValue(); - public abstract Optional bulkLoadBatchIdValue(); + @Default + public String bulkLoadBatchIdValue() + { + return UUID.randomUUID().toString(); + } @Default public String bulkLoadBatchStatusPattern() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index 9127b946ccf..207437cc54d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -75,6 +75,7 @@ import java.util.Arrays; import java.util.Set; import java.util.stream.Collectors; +import java.util.UUID; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; @@ -156,7 +157,11 @@ public Set schemaEvolutionCapabilitySet() return Collections.emptySet(); } - public abstract Optional bulkLoadBatchIdValue(); + @Default + public String bulkLoadBatchIdValue() + { + return UUID.randomUUID().toString(); + } //---------- FIELDS ---------- diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 0010ab8e10c..32793a5c47d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -42,6 +42,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.finos.legend.engine.persistence.components.common.StatisticName.*; @@ -187,7 +189,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() } @Test - public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() + public void testBulkLoadWithUpperCaseConversionAndDefaultBatchId() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") @@ -215,7 +217,6 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) - .bulkLoadBatchIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -224,6 +225,15 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() List ingestSql = operations.ingestSql(); Map statsSql = operations.postIngestStatisticsSql(); + // Extract the generated UUID + Pattern pattern = Pattern.compile("[a-f0-9]{8}(?:-[a-f0-9]{4}){4}[a-f0-9]{8}"); + Matcher matcher = pattern.matcher(ingestSql.get(0)); + String uuid = ""; + if (matcher.find()) + { + uuid = matcher.group(); + } + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY," + "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" VARCHAR,\"APPEND_TIME\" DATETIME)"; String expectedIngestSql = "COPY INTO \"MY_DB\".\"MY_NAME\" " + @@ -231,13 +241,13 @@ public void testBulkLoadWithDigestGeneratedWithUpperCaseConversion() "FROM " + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('COL_INT',legend_persistence_stage.$1,'COL_INTEGER',legend_persistence_stage.$2))," + - "'batch123','2000-01-01 00:00:00' " + + "'%s','2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); - Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals(String.format(expectedIngestSql, uuid), ingestSql.get(0)); Assertions.assertEquals("SELECT 0 as \"ROWSDELETED\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"ROWSTERMINATED\"", statsSql.get(ROWS_TERMINATED)); From cf6b4d092e915e0fedd174fd4fcf6b53c4ecf83e Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Fri, 15 Sep 2023 13:02:15 +0800 Subject: [PATCH 43/57] Refactor Append Log table name --- .../components/util/BulkLoadDatasetUtilsAnsiTest.java | 2 +- .../persistence/components/util/BulkLoadDatasetUtilsTest.java | 2 +- .../components/util/BulkLoadDatasetUtilsBigQueryTest.java | 2 +- .../components/util/BulkLoadDatasetUtilsSnowflakeTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java index af290f9c870..67a3337de07 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java @@ -30,7 +30,7 @@ public String getExpectedSqlForMetadata() public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java index 4a9f5c7a97c..65e5861a277 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java @@ -65,7 +65,7 @@ public void testInsertMetadata() public void testInsertMetadataInUpperCase() { BulkLoadMetadataUtils bulkLoadMetadataUtils = new BulkLoadMetadataUtils(bulkLoadMetadataDataset); - StringValue bulkLoadTableName = StringValue.of("APPEND_LOG_TABLE_NAME"); + StringValue bulkLoadTableName = StringValue.of("BULK_LOAD_TABLE_NAME"); StringValue batchLineageValue = StringValue.of("my_lineage_value"); Insert operation = bulkLoadMetadataUtils.insertMetaData(bulkLoadTableName, batchLineageValue); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java index eb44efeaa44..efcf49965fc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java @@ -31,7 +31,7 @@ public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA " + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + - "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java index dcaf59ffcde..8ad9c6351ef 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java @@ -30,7 +30,7 @@ public String getExpectedSqlForMetadata() public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','APPEND_LOG_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT 'batch_id_123','BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() From 3e695485aa7cdbb4cb1515d6bd574bea019c3551 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 19 Sep 2023 12:23:12 +0800 Subject: [PATCH 44/57] Resolve conflicts --- .../components/planner/BulkLoadPlanner.java | 61 ++++++++++++++++--- .../components/ingestmode/BulkLoadTest.java | 24 +++++--- .../ingestmode/bulkload/BulkLoadTest.java | 28 +++++---- 3 files changed, 82 insertions(+), 31 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 5960a951879..58c1c9bb6bb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -68,7 +68,7 @@ class BulkLoadPlanner extends Planner private StagedFilesDataset stagedFilesDataset; private BulkLoadMetadataDataset bulkLoadMetadataDataset; - BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions) + BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { super(datasets, ingestMode, plannerOptions, capabilities); @@ -118,13 +118,58 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopy(Resources resources) List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); - // Digest Generation - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert)); + // Add digest + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert, fieldsToSelect)); // Add batch_id field fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); fieldsToSelect.add(BulkLoadBatchIdValue.INSTANCE); + // Add auditing + if (ingestMode().auditing().accept(AUDIT_ENABLED)) + { + addAuditing(fieldsToInsert, fieldsToSelect); + } + + Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); + return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); + } + + private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resources) + { + List operations = new ArrayList<>(); + + + // Operation 1: Copy into a temp table + List fieldsToSelectFromStage = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); + List fieldsToInsertIntoTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); + Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelectFromStage).build(); + operations.add(Copy.of(tempDataset, selectStage, fieldsToInsertIntoTemp)); + + + // Operation 2: Transfer from temp table into target table, adding extra columns at the same time + List fieldsToSelectFromTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); + List fieldsToInsertIntoMain = new ArrayList<>(tempDataset.schemaReference().fieldValues()); + + // Add digest + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelectFromTemp, fieldsToInsertIntoMain, fieldsToSelectFromStage)); + + // Add batch_id field + fieldsToInsertIntoMain.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); + fieldsToSelectFromTemp.add(BulkLoadBatchIdValue.INSTANCE); + + // Add auditing + if (ingestMode().auditing().accept(AUDIT_ENABLED)) + { + addAuditing(fieldsToInsertIntoMain, fieldsToSelectFromTemp); + } + + operations.add(Insert.of(mainDataset(), Selection.builder().source(tempDataset).addAllFields(fieldsToSelectFromTemp).build(), fieldsToInsertIntoMain)); + + + return LogicalPlan.of(operations); + } + private void addAuditing(List fieldsToInsert, List fieldsToSelect) { BatchStartTimestamp batchStartTimestamp = BatchStartTimestamp.INSTANCE; @@ -133,10 +178,6 @@ private void addAuditing(List fieldsToInsert, List fieldsToSelect) fieldsToSelect.add(batchStartTimestamp); } - Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelect).build(); - return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); - } - @Override public LogicalPlan buildLogicalPlanForPreActions(Resources resources) { @@ -231,15 +272,17 @@ static class DigestGeneration implements DigestGenStrategyVisitor { private List fieldsToSelect; private List fieldsToInsert; + private List fieldsForDigestCalculation; private Dataset stagingDataset; private Dataset mainDataset; - public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert) + public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert, List fieldsForDigestCalculation) { this.mainDataset = mainDataset; this.stagingDataset = stagingDataset; this.fieldsToSelect = fieldsToSelect; this.fieldsToInsert = fieldsToInsert; + this.fieldsForDigestCalculation = fieldsForDigestCalculation; } @Override @@ -255,7 +298,7 @@ public Void visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udf .builder() .udfName(udfBasedDigestGenStrategy.digestUdfName()) .addAllFieldNames(stagingDataset.schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) - .addAllValues(fieldsToSelect) + .addAllValues(fieldsForDigestCalculation) .build(); String digestField = udfBasedDigestGenStrategy.digestField(); fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(digestField).build()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 3d40c64054d..c98e95cb519 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -51,7 +52,8 @@ public class BulkLoadTest private static final String APPEND_TIME = "append_time"; private static final String DIGEST = "digest"; private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; - private static final String LINEAGE = "lake_lineage"; + private static final String BATCH_ID = "batch_id"; + private static final String BATCH_ID_VALUE = "xyz123"; private static final String col_int = "col_int"; private static final String col_string = "col_string"; private static final String col_decimal = "col_decimal"; @@ -90,7 +92,8 @@ public class BulkLoadTest public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() { BulkLoad bulkLoad = BulkLoad.builder() - .generateDigest(false) + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -112,6 +115,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -121,15 +125,15 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + - "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`append_time` DATETIME)"; + "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` STRING,`append_time` DATETIME)"; String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + "FROM FILES (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + - "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -146,7 +150,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() { BulkLoad bulkLoad = BulkLoad.builder() - .generateDigest(false) + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -176,6 +181,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -185,7 +191,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + - "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`append_time` DATETIME)"; + "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` STRING,`append_time` DATETIME)"; String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + @@ -193,8 +199,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() "(uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], max_bad_records=100, quote=''', skip_leading_rows=1, format='CSV', encoding='UTF8', compression='GZIP', field_delimiter=',', null_marker='NULL')"; String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + - "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index c55980bd0da..1244c4dddfc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -63,11 +63,12 @@ public class BulkLoadTest extends BaseTest private static final String APPEND_TIME = "append_time"; private static final String DIGEST = "digest"; private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; + private static final String BATCH_ID = "batch_id"; + private static final String BATCH_ID_VALUE = "xyz123"; private static final String col_int = "col_int"; private static final String col_string = "col_string"; private static final String col_decimal = "col_decimal"; private static final String col_datetime = "col_datetime"; - private static final String BATCH_ID = "batch_id"; private static Field col1 = Field.builder() .name(col_int) @@ -96,7 +97,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception String filePath = "src/test/resources/data/bulk-load/input/staged_file1.csv"; BulkLoad bulkLoad = BulkLoad.builder() - .batchIdField("batch_id") + .batchIdField(BATCH_ID) .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -122,7 +123,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue("xyz123") + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -171,7 +172,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception BulkLoad bulkLoad = BulkLoad.builder() .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(NoAuditing.builder().build()) - .batchIdField("batch_id") + .batchIdField(BATCH_ID) .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -195,7 +196,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue("xyz123") + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -244,7 +245,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception BulkLoad bulkLoad = BulkLoad.builder() .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).digestField(DIGEST).build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) - .batchIdField("batch_id") + .batchIdField(BATCH_ID) .build(); Dataset stagedFilesDataset = StagedFilesDataset.builder() @@ -267,7 +268,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .bulkLoadBatchIdValue("xyz123") + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -317,7 +318,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except String filePath = "src/test/resources/data/bulk-load/input/staged_file4.csv"; BulkLoad bulkLoad = BulkLoad.builder() - .batchIdField("batch_id") + .batchIdField(BATCH_ID) .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).digestField(DIGEST).build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -342,7 +343,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .bulkLoadBatchIdValue("xyz123") + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -392,6 +393,7 @@ public void testBulkLoadDigestColumnNotProvided() { BulkLoad bulkLoad = BulkLoad.builder() .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).build()) + .batchIdField(BATCH_ID) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); Assertions.fail("Exception was not thrown"); @@ -409,7 +411,7 @@ public void testBulkLoadDigestUDFNotProvided() { BulkLoad bulkLoad = BulkLoad.builder() .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField(DIGEST).build()) - .batchIdField("batch_id") + .batchIdField(BATCH_ID) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); Assertions.fail("Exception was not thrown"); @@ -426,7 +428,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() try { BulkLoad bulkLoad = BulkLoad.builder() - .batchIdField("batch_id") + .batchIdField(BATCH_ID) .digestGenStrategy(NoDigestGenStrategy.builder().build()) .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) .build(); @@ -444,7 +446,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) - .bulkLoadBatchIdValue("xyz123") + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -506,7 +508,7 @@ RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions o .executionTimestampClock(executionTimestampClock) .cleanupStagingData(options.cleanupStagingData()) .collectStatistics(options.collectStatistics()) - .bulkLoadBatchIdValue("xyz123") + .bulkLoadBatchIdValue(BATCH_ID_VALUE) .enableConcurrentSafety(true) .caseConversion(caseConversion) .build(); From 97a7eb073219ef159a574e36d29d2dc83720127a Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 20 Sep 2023 14:19:14 +0800 Subject: [PATCH 45/57] Add digest udf and more tests --- .../logicalplan/values/DigestUdfAbstract.java | 4 + .../logicalplan/values/FunctionName.java | 3 +- .../components/planner/BulkLoadPlanner.java | 26 +- .../relational/bigquery/BigQuerySink.java | 42 +++ .../sql/visitor/DigestUdfVisitor.java | 40 +++ .../components/ingestmode/BulkLoadTest.java | 252 ++++++++++++++++++ .../sqldom/common/FunctionName.java | 3 +- .../components/relational/h2/H2Sink.java | 1 + 8 files changed, 361 insertions(+), 10 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java index 06f153770df..0dd7c1b56e2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.logicalplan.values; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; + import java.util.List; @org.immutables.value.Value.Immutable @@ -32,4 +34,6 @@ public interface DigestUdfAbstract extends Value List fieldNames(); List values(); + + Dataset dataset(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java index 37e611e7ee9..706830f613f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/FunctionName.java @@ -37,5 +37,6 @@ public enum FunctionName GENERATE_ARRAY, PARSE_DATETIME, OBJECT_CONSTRUCT, - TO_VARIANT; + TO_VARIANT, + TO_JSON; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 58c1c9bb6bb..f089e9d0e22 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -119,7 +119,7 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopy(Resources resources) List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); // Add digest - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert, fieldsToSelect)); + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelect, fieldsToInsert)); // Add batch_id field fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); @@ -152,7 +152,7 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resour List fieldsToInsertIntoMain = new ArrayList<>(tempDataset.schemaReference().fieldValues()); // Add digest - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), fieldsToSelectFromTemp, fieldsToInsertIntoMain, fieldsToSelectFromStage)); + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), tempDataset, fieldsToSelectFromTemp, fieldsToInsertIntoMain)); // Add batch_id field fieldsToInsertIntoMain.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); @@ -272,17 +272,26 @@ static class DigestGeneration implements DigestGenStrategyVisitor { private List fieldsToSelect; private List fieldsToInsert; - private List fieldsForDigestCalculation; private Dataset stagingDataset; private Dataset mainDataset; + private Optional tempDataset; - public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert, List fieldsForDigestCalculation) + public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert) { this.mainDataset = mainDataset; this.stagingDataset = stagingDataset; + this.tempDataset = Optional.empty(); + this.fieldsToSelect = fieldsToSelect; + this.fieldsToInsert = fieldsToInsert; + } + + public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, Dataset tempDataset, List fieldsToSelect, List fieldsToInsert) + { + this.mainDataset = mainDataset; + this.stagingDataset = stagingDataset; + this.tempDataset = Optional.of(tempDataset); this.fieldsToSelect = fieldsToSelect; this.fieldsToInsert = fieldsToInsert; - this.fieldsForDigestCalculation = fieldsForDigestCalculation; } @Override @@ -294,12 +303,13 @@ public Void visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStra @Override public Void visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) { - Value digestValue = DigestUdf + DigestUdf.Builder digestValueBuilder = DigestUdf .builder() .udfName(udfBasedDigestGenStrategy.digestUdfName()) .addAllFieldNames(stagingDataset.schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) - .addAllValues(fieldsForDigestCalculation) - .build(); + .addAllValues(fieldsToSelect); + tempDataset.ifPresent(digestValueBuilder::dataset); + Value digestValue = digestValueBuilder.build(); String digestField = udfBasedDigestGenStrategy.digestField(); fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(digestField).build()); fieldsToSelect.add(digestValue); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 04401ba1973..bcb336aad64 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -14,6 +14,8 @@ package org.finos.legend.engine.persistence.components.relational.bigquery; +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.logicalplan.datasets.ClusterKey; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; @@ -32,6 +34,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.DatetimeValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.StagedFilesFieldValue; import org.finos.legend.engine.persistence.components.optimizer.Optimizer; import org.finos.legend.engine.persistence.components.relational.CaseConversion; @@ -40,6 +43,8 @@ import org.finos.legend.engine.persistence.components.relational.ansi.AnsiSqlSink; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.LowerCaseOptimizer; import org.finos.legend.engine.persistence.components.relational.ansi.optimizer.UpperCaseOptimizer; +import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; +import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.bigquery.executor.BigQueryConnection; import org.finos.legend.engine.persistence.components.relational.bigquery.executor.BigQueryExecutor; @@ -53,6 +58,7 @@ import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.CopyVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.DatetimeValueVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.DeleteVisitor; +import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.DigestUdfVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.FieldVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.PartitionKeyVisitor; import org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor.SQLCreateVisitor; @@ -76,6 +82,8 @@ import java.util.Optional; import java.util.Set; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; + public class BigQuerySink extends AnsiSqlSink { private static final RelationalSink INSTANCE; @@ -112,6 +120,7 @@ public class BigQuerySink extends AnsiSqlSink logicalPlanVisitorByClass.put(StagedFilesDataset.class, new StagedFilesDatasetVisitor()); logicalPlanVisitorByClass.put(StagedFilesSelection.class, new StagedFilesSelectionVisitor()); logicalPlanVisitorByClass.put(StagedFilesDatasetReference.class, new StagedFilesDatasetReferenceVisitor()); + logicalPlanVisitorByClass.put(DigestUdf.class, new DigestUdfVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); Map> implicitDataTypeMapping = new HashMap<>(); @@ -244,4 +253,37 @@ public Field createNewField(Field evolveTo, Field evolveFrom, Optional .identity(evolveTo.identity()).unique(evolveTo.unique()) .defaultValue(evolveTo.defaultValue()).type(modifiedFieldType).build(); } + + @Override + public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) + { + executor.executePhysicalPlan(ingestSqlPlan, placeHolderKeyValues); + + Map stats = new HashMap<>(); + stats.put(StatisticName.FILES_LOADED, 0); // todo: check this + stats.put(StatisticName.ROWS_WITH_ERRORS, 0); // todo: check this + + SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); + if (rowsInsertedSqlPlan != null) + { + stats.put(StatisticName.ROWS_INSERTED, executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues) + .stream() + .findFirst() + .map(TabularData::getData) + .flatMap(t -> t.stream().findFirst()) + .map(Map::values) + .flatMap(t -> t.stream().findFirst()) + .orElseThrow(IllegalStateException::new)); + } + + IngestorResult result; + result = IngestorResult.builder() + .status(IngestStatus.SUCCEEDED) + .updatedDatasets(datasets) + .putAllStatisticByName(stats) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) + .build(); + + return result; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java new file mode 100644 index 00000000000..f6719403e28 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java @@ -0,0 +1,40 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; + +import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; +import org.finos.legend.engine.persistence.components.logicalplan.values.ObjectValue; +import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Udf; +import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; +import org.finos.legend.engine.persistence.components.transformer.VisitorContext; + +import java.util.Arrays; + +public class DigestUdfVisitor implements LogicalPlanVisitor +{ + + @Override + public VisitorResult visit(PhysicalPlanNode prev, DigestUdf current, VisitorContext context) + { + Udf udf = new Udf(context.quoteIdentifier(), current.udfName()); + prev.push(udf); + + FunctionImpl function = FunctionImpl.builder().functionName(FunctionName.TO_JSON).addValue(ObjectValue.of(current.dataset().datasetReference().alias())).build(); + return new VisitorResult(udf, Arrays.asList(function)); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index c98e95cb519..7495fbed067 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -19,7 +19,9 @@ import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; +import org.finos.legend.engine.persistence.components.ingestmode.digest.UDFBasedDigestGenStrategy; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; @@ -27,6 +29,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.relational.CaseConversion; import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; @@ -212,4 +215,253 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); } + + @Test + public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(CsvFileFormat.builder().build()) + .addAllFiles(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + + "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` STRING)"; + + String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + + "FROM FILES (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; + + String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`) " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,'xyz123' " + + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); + Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + + Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); + Assertions.assertNull(statsSql.get(ROWS_INSERTED)); + } + + @Test + public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField(DIGEST).digestUdfName(DIGEST_UDF).build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(CsvFileFormat.builder().build()) + .addAllFiles(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + + "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`digest` STRING,`batch_id` STRING,`append_time` DATETIME)"; + + String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + + "FROM FILES (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; + + String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `digest`, `batch_id`, `append_time`) " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); + Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + + Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as `rowsUpdated`", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as `rowsInserted` FROM `my_db`.`my_name` as my_alias WHERE my_alias.`append_time` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + } + + @Test + public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField(DIGEST).digestUdfName(DIGEST_UDF).build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(CsvFileFormat.builder().build()) + .addAllFiles(filesList).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .caseConversion(CaseConversion.TO_UPPER) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + + List preActionsSql = operations.preActionsSql(); + List ingestSql = operations.ingestSql(); + Map statsSql = operations.postIngestStatisticsSql(); + + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `MY_DB`.`MY_NAME`" + + "(`COL_INT` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON,`DIGEST` STRING,`BATCH_ID` STRING,`APPEND_TIME` DATETIME)"; + + String expectedCopySql = "LOAD DATA OVERWRITE `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` " + + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON) " + + "FROM FILES (uris=['/path/xyz/file1.csv','/path/xyz/file2.csv'], format='CSV')"; + + String expectedInsertSql = "INSERT INTO `MY_DB`.`MY_NAME` " + + "(`COL_INT`, `COL_STRING`, `COL_DECIMAL`, `COL_DATETIME`, `COL_VARIANT`, `DIGEST`, `BATCH_ID`, `APPEND_TIME`) " + + "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "FROM `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` as legend_persistence_temp)"; + + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); + Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); + Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + + Assertions.assertEquals("SELECT 0 as `ROWSDELETED`", statsSql.get(ROWS_DELETED)); + Assertions.assertEquals("SELECT 0 as `ROWSTERMINATED`", statsSql.get(ROWS_TERMINATED)); + Assertions.assertEquals("SELECT 0 as `ROWSUPDATED`", statsSql.get(ROWS_UPDATED)); + Assertions.assertEquals("SELECT COUNT(*) as `ROWSINSERTED` FROM `MY_DB`.`MY_NAME` as my_alias WHERE my_alias.`APPEND_TIME` = PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00')", statsSql.get(ROWS_INSERTED)); + } + + @Test + public void testBulkLoadDigestColumnNotProvided() + { + try + { + BulkLoad bulkLoad = BulkLoad.builder() + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestUdfName(DIGEST_UDF).build()) + .batchIdField(BATCH_ID) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Cannot build UDFBasedDigestGenStrategy, some of required attributes are not set [digestField]")); + } + } + + @Test + public void testBulkLoadDigestUDFNotProvided() + { + try + { + BulkLoad bulkLoad = BulkLoad.builder() + .digestGenStrategy(UDFBasedDigestGenStrategy.builder().digestField(DIGEST).build()) + .batchIdField(BATCH_ID) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Cannot build UDFBasedDigestGenStrategy, some of required attributes are not set [digestUdfName]")); + } + } + + @Test + public void testBulkLoadStagedFilesDatasetNotProvided() + { + try + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagingDataset = DatasetDefinition.builder() + .database("my_db").name("my_stage").alias("my_alias") + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2)).build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Only StagedFilesDataset are allowed under Bulk Load")); + } + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java index 34482bded7c..6a345b82ebe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/FunctionName.java @@ -45,7 +45,8 @@ public enum FunctionName PARSE_DATETIME("PARSE_DATETIME"), PARSE_JSON("PARSE_JSON"), TO_VARIANT("TO_VARIANT"), - OBJECT_CONSTRUCT("OBJECT_CONSTRUCT"); + OBJECT_CONSTRUCT("OBJECT_CONSTRUCT"), + TO_JSON("TO_JSON"); private static final Map BY_NAME = Arrays .stream(FunctionName.values()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 5548d89c50c..5d4f0ac1676 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -201,6 +201,7 @@ public Optional optimizerForCaseConversion(CaseConversion caseConvers } } + @Override public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { executor.executePhysicalPlan(ingestSqlPlan, placeHolderKeyValues); From 4e287942f69fe358a1708134f060ed9c2f0c6c0a Mon Sep 17 00:00:00 2001 From: kumuwu Date: Wed, 20 Sep 2023 14:33:55 +0800 Subject: [PATCH 46/57] Fix digest problem --- .../components/logicalplan/values/DigestUdfAbstract.java | 3 ++- .../components/relational/bigquery/BigQuerySink.java | 3 ++- .../relational/bigquery/sql/visitor/DigestUdfVisitor.java | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java index 0dd7c1b56e2..e6c35ff9c7b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/DigestUdfAbstract.java @@ -17,6 +17,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import java.util.List; +import java.util.Optional; @org.immutables.value.Value.Immutable @org.immutables.value.Value.Style( @@ -35,5 +36,5 @@ public interface DigestUdfAbstract extends Value List values(); - Dataset dataset(); + Optional dataset(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index bcb336aad64..ef0eb7990ed 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -260,7 +260,8 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = new HashMap<>(); - stats.put(StatisticName.FILES_LOADED, 0); // todo: check this + StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); + stats.put(StatisticName.FILES_LOADED, stagedFilesDataset.stagedFilesDatasetProperties().files().size()); // todo: check this stats.put(StatisticName.ROWS_WITH_ERRORS, 0); // todo: check this SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java index f6719403e28..c6b5faec90e 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/DigestUdfVisitor.java @@ -34,7 +34,7 @@ public VisitorResult visit(PhysicalPlanNode prev, DigestUdf current, VisitorCont Udf udf = new Udf(context.quoteIdentifier(), current.udfName()); prev.push(udf); - FunctionImpl function = FunctionImpl.builder().functionName(FunctionName.TO_JSON).addValue(ObjectValue.of(current.dataset().datasetReference().alias())).build(); + FunctionImpl function = FunctionImpl.builder().functionName(FunctionName.TO_JSON).addValue(ObjectValue.of(current.dataset().orElseThrow(IllegalStateException::new).datasetReference().alias())).build(); return new VisitorResult(udf, Arrays.asList(function)); } } From d804bd256ea8669479f993d674dd539a861019da Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 21 Sep 2023 14:46:46 +0800 Subject: [PATCH 47/57] Change H2 digest algo --- .../relational/h2/H2DigestUtil.java | 65 +++++++++++++++++-- .../components/H2DigestUtilTest.java | 32 +++++++++ 2 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/H2DigestUtilTest.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java index 82a7f3788f7..f2fc4ecb701 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java @@ -17,9 +17,19 @@ import org.apache.commons.codec.digest.DigestUtils; import org.finos.legend.engine.persistence.components.relational.jdbc.JdbcHelper; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + public class H2DigestUtil { + private static final byte[] EMPTY_STRING = new byte[] { 88 }; public static void registerMD5Udf(JdbcHelper sink, String UdfName) { sink.executeStatement("CREATE ALIAS " + UdfName + " FOR \"org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil.MD5\";"); @@ -27,9 +37,56 @@ public static void registerMD5Udf(JdbcHelper sink, String UdfName) public static String MD5(String[] columnNameList, String[] columnValueList) { - String columnNames = String.join("", columnNameList); - String columnValues = String.join("", columnValueList); - String columnNamesAndColumnValues = columnNames + columnValues; - return DigestUtils.md5Hex(columnNamesAndColumnValues).toUpperCase(); + return calculateMD5Digest(generateRowMap(columnNameList, columnValueList)); + } + + private static Map generateRowMap(String[] columnNameList, String[] columnValueList) + { + Map map = new HashMap<>(); + for (int i = 0; i < columnNameList.length; i++) + { + map.put(columnNameList[i], columnValueList[i]); + } + return map; + } + + public static String calculateMD5Digest(Map row) + { + List fieldNames = row.keySet().stream().sorted().collect(Collectors.toList()); + try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream)) + { + fieldNames.stream().forEachOrdered(field -> + { + Optional value = Optional.ofNullable(row.get(field)); + value.ifPresent(v -> writeValueWithFieldName(field, v, dataOutputStream)); + }); + dataOutputStream.flush(); + return DigestUtils.md5Hex(byteArrayOutputStream.toByteArray()); + } + catch (IOException e) + { + throw new RuntimeException("Unable to create digest", e); + } + } + + private static void writeValueWithFieldName(String fieldName, Object value, DataOutputStream dataOutputStream) + { + try { + dataOutputStream.writeInt(fieldName.hashCode()); + String stringValue = value.toString(); + if (stringValue == null || stringValue.length() == 0) + { + dataOutputStream.write(EMPTY_STRING); + } + else + { + dataOutputStream.writeBytes(stringValue); + } + } + catch (IOException e) + { + throw new RuntimeException(String.format("Unable to create digest for field [%s]", fieldName), e); + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/H2DigestUtilTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/H2DigestUtilTest.java new file mode 100644 index 00000000000..0a0b29ea9ea --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/H2DigestUtilTest.java @@ -0,0 +1,32 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components; + +import org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class H2DigestUtilTest +{ + private String expectedDigest = "fd40b241c6d2eb55348e3bc51e81925b"; + private String[] columns = new String[]{"COLUMN_1", "COLUMN_2", "COLUMN_3", "COLUMN_4", "COLUMN_5", "COLUMN_6"}; + private String[] values = new String[]{"test data", "true", "33", "1111", "1.5", null}; + + @Test + void testMD5() + { + Assertions.assertEquals(expectedDigest, H2DigestUtil.MD5(columns, values)); + } +} From 106de039d2543e38a4ab5b9eb1632d310205ede0 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 21 Sep 2023 14:56:53 +0800 Subject: [PATCH 48/57] Fix tests --- .../persistence/components/relational/h2/H2DigestUtil.java | 4 +++- .../resources/data/bulk-load/expected/expected_table3.csv | 6 +++--- .../resources/data/bulk-load/expected/expected_table4.csv | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java index f2fc4ecb701..648e01bec26 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java @@ -30,6 +30,7 @@ public class H2DigestUtil { private static final byte[] EMPTY_STRING = new byte[] { 88 }; + public static void registerMD5Udf(JdbcHelper sink, String UdfName) { sink.executeStatement("CREATE ALIAS " + UdfName + " FOR \"org.finos.legend.engine.persistence.components.relational.h2.H2DigestUtil.MD5\";"); @@ -72,7 +73,8 @@ public static String calculateMD5Digest(Map row) private static void writeValueWithFieldName(String fieldName, Object value, DataOutputStream dataOutputStream) { - try { + try + { dataOutputStream.writeInt(fieldName.hashCode()); String stringValue = value.toString(); if (stringValue == null || stringValue.length() == 0) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv index 8fc9ed0670f..c6774c43774 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,6366D6AFD9E8B991393E719A5A4E6D35,xyz123,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,C556B5DC2B9F3A66000202DF9D98EC05,xyz123,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,051D68CF86951CDE0DF875915940AEC6,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,9fc62c73317227ab0760aed72f4fee17,xyz123,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,b0383f1a479eb2a6c5186f045af4c51f,xyz123,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,dc170980c8540e2a667753e793dad94c,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv index 074bc2e251d..7888259500d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,4B39799C7A1FB5EFC4BC328966A159E0,xyz123,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,58467B440BCED7607369DC8A260B0607,xyz123,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,29B8C8A6CD28B069290372E6B54B6C72,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,e7dc92b208f2244b9ece45d706474f55,xyz123,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,278cf3ee2c2981bb8aeade81cc21e87a,xyz123,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,e8ff35a6699515eaca0a798a7f989978,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file From 2e630dfc3c8372189cb38da81f4a9229501ba39f Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 21 Sep 2023 15:03:00 +0800 Subject: [PATCH 49/57] Fix typo --- .../persistence/components/relational/h2/H2DigestUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java index 648e01bec26..3a6c1db5e95 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2DigestUtil.java @@ -51,7 +51,7 @@ private static Map generateRowMap(String[] columnNameList, Strin return map; } - public static String calculateMD5Digest(Map row) + private static String calculateMD5Digest(Map row) { List fieldNames = row.keySet().stream().sorted().collect(Collectors.toList()); try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); From 31b21991c2b8a932526b5ae02b5cf05fd2223ee0 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 26 Sep 2023 16:33:17 +0800 Subject: [PATCH 50/57] Refactor file format and load options --- .../common/AvroFileFormatAbstract.java | 40 ----------- .../components/common/FileFormat.java | 19 ++++- .../components/common/FileFormatVisitor.java | 26 ------- .../common/JsonFileFormatAbstract.java | 46 ------------- ...Abstract.java => LoadOptionsAbstract.java} | 14 +--- .../common/ParquetFileFormatAbstract.java | 40 ----------- ...yStagedFilesDatasetPropertiesAbstract.java | 5 ++ .../StagedFilesDatasetReferenceVisitor.java | 69 +++++++------------ .../components/ingestmode/BulkLoadTest.java | 14 ++-- ...2StagedFilesDatasetPropertiesAbstract.java | 3 +- .../ingestmode/bulkload/BulkLoadTest.java | 15 ++-- 11 files changed, 61 insertions(+), 230 deletions(-) delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/{CsvFileFormatAbstract.java => LoadOptionsAbstract.java} (80%) delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java deleted file mode 100644 index 647d5ea9152..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/AvroFileFormatAbstract.java +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.common; - -import org.immutables.value.Value; - -@Value.Immutable -@Value.Style( - typeAbstract = "*Abstract", - typeImmutable = "*", - jdkOnly = true, - optionalAcceptNullable = true, - strictBuilder = true -) -public interface AvroFileFormatAbstract extends FileFormat -{ - @Override - default String getFormatName() - { - return "AVRO"; - } - - @Override - default T accept(FileFormatVisitor visitor) - { - return visitor.visitAvroFileFormat(this); - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java index 3214bc18ed5..3d5f556970a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java @@ -14,9 +14,22 @@ package org.finos.legend.engine.persistence.components.common; -public interface FileFormat +public enum FileFormat { - String getFormatName(); + CSV("CSV"), + JSON("JSON"), + AVRO("AVRO"), + PARQUET("PARQUET"); - T accept(FileFormatVisitor visitor); + String name; + + FileFormat(String name) + { + this.name = name; + } + + public String getName() + { + return this.name; + } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java deleted file mode 100644 index 78a755be835..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormatVisitor.java +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.common; - -public interface FileFormatVisitor -{ - T visitCsvFileFormat(CsvFileFormatAbstract csvFileFormat); - - T visitJsonFileFormat(JsonFileFormatAbstract jsonFileFormat); - - T visitAvroFileFormat(AvroFileFormatAbstract avroFileFormat); - - T visitParquetFileFormat(ParquetFileFormatAbstract parquetFileFormat); -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java deleted file mode 100644 index 590cbcc3f6c..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/JsonFileFormatAbstract.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.common; - -import org.immutables.value.Value; - -import java.util.Optional; - -@Value.Immutable -@Value.Style( - typeAbstract = "*Abstract", - typeImmutable = "*", - jdkOnly = true, - optionalAcceptNullable = true, - strictBuilder = true -) -public interface JsonFileFormatAbstract extends FileFormat -{ - Optional maxBadRecords(); - - Optional compression(); - - @Override - default String getFormatName() - { - return "JSON"; - } - - @Override - default T accept(FileFormatVisitor visitor) - { - return visitor.visitJsonFileFormat(this); - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/LoadOptionsAbstract.java similarity index 80% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/LoadOptionsAbstract.java index d9f6e7e4138..c299b0b7aa5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/CsvFileFormatAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/LoadOptionsAbstract.java @@ -26,7 +26,7 @@ optionalAcceptNullable = true, strictBuilder = true ) -public interface CsvFileFormatAbstract extends FileFormat +public interface LoadOptionsAbstract { Optional fieldDelimiter(); @@ -41,16 +41,4 @@ public interface CsvFileFormatAbstract extends FileFormat Optional maxBadRecords(); Optional compression(); - - @Override - default String getFormatName() - { - return "CSV"; - } - - @Override - default T accept(FileFormatVisitor visitor) - { - return visitor.visitCsvFileFormat(this); - } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java deleted file mode 100644 index c74d0eaad37..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/ParquetFileFormatAbstract.java +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.common; - -import org.immutables.value.Value; - -@Value.Immutable -@Value.Style( - typeAbstract = "*Abstract", - typeImmutable = "*", - jdkOnly = true, - optionalAcceptNullable = true, - strictBuilder = true -) -public interface ParquetFileFormatAbstract extends FileFormat -{ - @Override - default String getFormatName() - { - return "PARQUET"; - } - - @Override - default T accept(FileFormatVisitor visitor) - { - return visitor.visitParquetFileFormat(this); - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java index 70e5a26c1ba..088fed20cb4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/logicalplan/datasets/BigQueryStagedFilesDatasetPropertiesAbstract.java @@ -16,9 +16,12 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets; import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.LoadOptions; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; +import java.util.Optional; + @Value.Immutable @Value.Style( @@ -31,4 +34,6 @@ public interface BigQueryStagedFilesDatasetPropertiesAbstract extends StagedFilesDatasetProperties { FileFormat fileFormat(); + + Optional loadOptions(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java index c24c3bb7128..374f55fa4d7 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -14,12 +14,8 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; -import org.finos.legend.engine.persistence.components.common.AvroFileFormatAbstract; -import org.finos.legend.engine.persistence.components.common.CsvFileFormatAbstract; import org.finos.legend.engine.persistence.components.common.FileFormat; -import org.finos.legend.engine.persistence.components.common.FileFormatVisitor; -import org.finos.legend.engine.persistence.components.common.JsonFileFormatAbstract; -import org.finos.legend.engine.persistence.components.common.ParquetFileFormatAbstract; +import org.finos.legend.engine.persistence.components.common.LoadOptions; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; @@ -43,55 +39,36 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu Map loadOptionsMap = new HashMap<>(); FileFormat fileFormat = datasetProperties.fileFormat(); - loadOptionsMap.put("format", fileFormat.getFormatName()); - fileFormat.accept(new RetrieveLoadOptions(loadOptionsMap)); + loadOptionsMap.put("format", fileFormat.getName()); + datasetProperties.loadOptions().ifPresent(options -> retrieveLoadOptions(fileFormat, options, loadOptionsMap)); prev.push(loadOptionsMap); - prev.push(datasetProperties.files()); return new VisitorResult(null); } - private static class RetrieveLoadOptions implements FileFormatVisitor + private void retrieveLoadOptions(FileFormat fileFormat, LoadOptions loadOptions, Map loadOptionsMap) { - private Map loadOptionsMap; - - RetrieveLoadOptions(Map loadOptionsMap) - { - this.loadOptionsMap = loadOptionsMap; - } - - @Override - public Void visitCsvFileFormat(CsvFileFormatAbstract csvFileFormat) - { - csvFileFormat.fieldDelimiter().ifPresent(property -> loadOptionsMap.put("field_delimiter", property)); - csvFileFormat.encoding().ifPresent(property -> loadOptionsMap.put("encoding", property)); - csvFileFormat.nullMarker().ifPresent(property -> loadOptionsMap.put("null_marker", property)); - csvFileFormat.quote().ifPresent(property -> loadOptionsMap.put("quote", property)); - csvFileFormat.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property)); - csvFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); - csvFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); - return null; - } - - @Override - public Void visitJsonFileFormat(JsonFileFormatAbstract jsonFileFormat) - { - jsonFileFormat.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); - jsonFileFormat.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); - return null; - } - - @Override - public Void visitAvroFileFormat(AvroFileFormatAbstract avroFileFormat) - { - return null; - } - - @Override - public Void visitParquetFileFormat(ParquetFileFormatAbstract parquetFileFormat) + switch (fileFormat) { - return null; + case CSV: + loadOptions.fieldDelimiter().ifPresent(property -> loadOptionsMap.put("field_delimiter", property)); + loadOptions.encoding().ifPresent(property -> loadOptionsMap.put("encoding", property)); + loadOptions.nullMarker().ifPresent(property -> loadOptionsMap.put("null_marker", property)); + loadOptions.quote().ifPresent(property -> loadOptionsMap.put("quote", property)); + loadOptions.skipLeadingRows().ifPresent(property -> loadOptionsMap.put("skip_leading_rows", property)); + loadOptions.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); + loadOptions.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); + break; + case JSON: + loadOptions.maxBadRecords().ifPresent(property -> loadOptionsMap.put("max_bad_records", property)); + loadOptions.compression().ifPresent(property -> loadOptionsMap.put("compression", property)); + break; + case AVRO: + case PARQUET: + return; + default: + throw new IllegalStateException("Unrecognized file format: " + fileFormat); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 7495fbed067..5cef940d852 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -14,8 +14,9 @@ package org.finos.legend.engine.persistence.components.ingestmode; -import org.finos.legend.engine.persistence.components.common.CsvFileFormat; import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.LoadOptions; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; @@ -103,7 +104,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -161,7 +162,8 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder() + .fileFormat(FileFormat.CSV) + .loadOptions(LoadOptions.builder() .encoding("UTF8") .maxBadRecords(100L) .nullMarker("NULL") @@ -228,7 +230,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -286,7 +288,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); @@ -344,7 +346,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(filesList).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, col5)).build()) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java index 098b454df28..ea69a121e66 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/logicalplan/datasets/H2StagedFilesDatasetPropertiesAbstract.java @@ -15,7 +15,6 @@ package org.finos.legend.engine.persistence.components.relational.h2.logicalplan.datasets; -import org.finos.legend.engine.persistence.components.common.CsvFileFormat; import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; import org.immutables.value.Value; @@ -40,7 +39,7 @@ default void validate() { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only 1 file per load supported"); } - if (!(fileFormat() instanceof CsvFileFormat)) + if (fileFormat() != FileFormat.CSV) { throw new IllegalArgumentException("Cannot build H2StagedFilesDatasetProperties, only CSV file loading supported"); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index 1244c4dddfc..a4da48a42c6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -15,9 +15,8 @@ package org.finos.legend.engine.persistence.components.ingestmode.bulkload; import org.finos.legend.engine.persistence.components.BaseTest; -import org.finos.legend.engine.persistence.components.common.CsvFileFormat; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.common.JsonFileFormat; +import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; @@ -105,7 +104,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -178,7 +177,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -251,7 +250,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -326,7 +325,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(Collections.singletonList(filePath)).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -468,7 +467,7 @@ public void testBulkLoadMoreThanOneFile() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(CsvFileFormat.builder().build()) + .fileFormat(FileFormat.CSV) .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.csv", "src/test/resources/data/bulk-load/input/staged_file2.csv")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -488,7 +487,7 @@ public void testBulkLoadNotCsvFile() Dataset stagedFilesDataset = StagedFilesDataset.builder() .stagedFilesDatasetProperties( H2StagedFilesDatasetProperties.builder() - .fileFormat(JsonFileFormat.builder().build()) + .fileFormat(FileFormat.JSON) .addAllFiles(Arrays.asList("src/test/resources/data/bulk-load/input/staged_file1.json")).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); From b948e02cf45b282498579aa33562ec7d60bc6b84 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 26 Sep 2023 17:29:38 +0800 Subject: [PATCH 51/57] Refactor dataset, selection, reference logic --- .../components/planner/BulkLoadPlanner.java | 3 +- .../bigquery/sql/visitor/CopyVisitor.java | 1 + .../StagedFilesDatasetReferenceVisitor.java | 6 ++- .../visitor/StagedFilesDatasetVisitor.java | 14 +------ .../visitor/StagedFilesSelectionVisitor.java | 17 ++------ .../table/StagedFilesTable.java} | 40 ++++--------------- .../schemaops/statements/CopyStatement.java | 34 +++++++++++++--- .../relational/sqldom/common/Clause.java | 2 +- 8 files changed, 48 insertions(+), 69 deletions(-) rename legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/{statements/SelectFromFileStatement.java => expressions/table/StagedFilesTable.java} (75%) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index f089e9d0e22..ca62f9b7810 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -142,9 +142,8 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resour // Operation 1: Copy into a temp table List fieldsToSelectFromStage = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); - List fieldsToInsertIntoTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); Dataset selectStage = StagedFilesSelection.builder().source(stagedFilesDataset).addAllFields(fieldsToSelectFromStage).build(); - operations.add(Copy.of(tempDataset, selectStage, fieldsToInsertIntoTemp)); + operations.add(Copy.of(tempDataset, selectStage, fieldsToSelectFromStage)); // Operation 2: Transfer from temp table into target table, adding extra columns at the same time diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java index 502c92e22f0..b383f1f6b80 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/CopyVisitor.java @@ -36,6 +36,7 @@ public VisitorResult visit(PhysicalPlanNode prev, Copy current, VisitorContext c List logicalPlanNodes = new ArrayList<>(); logicalPlanNodes.add(current.sourceDataset()); logicalPlanNodes.add(current.targetDataset()); + logicalPlanNodes.addAll(current.fields()); return new VisitorResult(copyStatement, logicalPlanNodes); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java index 374f55fa4d7..82f7045a630 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -19,6 +19,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.expressions.table.StagedFilesTable; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; @@ -41,8 +42,9 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu FileFormat fileFormat = datasetProperties.fileFormat(); loadOptionsMap.put("format", fileFormat.getName()); datasetProperties.loadOptions().ifPresent(options -> retrieveLoadOptions(fileFormat, options, loadOptionsMap)); - prev.push(loadOptionsMap); - prev.push(datasetProperties.files()); + + StagedFilesTable stagedFilesTable = new StagedFilesTable(datasetProperties.files(), loadOptionsMap); + prev.push(stagedFilesTable); return new VisitorResult(null); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java index a709ad514fe..b482a7e6e3b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetVisitor.java @@ -15,26 +15,16 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; -import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; -import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; -import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; - -import java.util.List; public class StagedFilesDatasetVisitor implements LogicalPlanVisitor { @Override public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDataset current, VisitorContext context) { - List allColumns = LogicalPlanUtils.extractStagedFilesFieldValues(current); - StagedFilesSelection selection = StagedFilesSelection.builder() - .source(current) - .addAllFields(allColumns) - .alias(current.datasetReference().alias()) - .build(); - return new StagedFilesSelectionVisitor().visit(prev, selection, context); + return new StagedFilesDatasetReferenceVisitor().visit(prev, (StagedFilesDatasetReference) current.datasetReference(), context); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java index 0e57ce3c7f5..c25d175be4d 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesSelectionVisitor.java @@ -14,29 +14,20 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.sql.visitor; -import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanNode; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; -import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements.SelectFromFileStatement; import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; import org.finos.legend.engine.persistence.components.transformer.VisitorContext; -import java.util.ArrayList; -import java.util.List; - public class StagedFilesSelectionVisitor implements LogicalPlanVisitor { @Override public VisitorResult visit(PhysicalPlanNode prev, StagedFilesSelection current, VisitorContext context) { - SelectFromFileStatement selectFromFileStatement = new SelectFromFileStatement(); - prev.push(selectFromFileStatement); - - List logicalPlanNodeList = new ArrayList<>(); - logicalPlanNodeList.add(current.source().datasetReference()); - logicalPlanNodeList.addAll(current.fields()); - - return new VisitorResult(selectFromFileStatement, logicalPlanNodeList); + StagedFilesDataset stagedFilesDataset = current.source(); + return new StagedFilesDatasetReferenceVisitor().visit(prev, (StagedFilesDatasetReference) stagedFilesDataset.datasetReference(), context); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java similarity index 75% rename from legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java rename to legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java index 6292ebdf73f..2a8d288eeb4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/SelectFromFileStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/expressions/table/StagedFilesTable.java @@ -12,16 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements; +package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.expressions.table; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; -import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.select.SelectExpression; -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.TableLike; import org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils; -import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -29,38 +26,27 @@ import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_SQUARE_BRACKET; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; -import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.EMPTY; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_SQUARE_BRACKET; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; -public class SelectFromFileStatement extends SelectExpression +public class StagedFilesTable extends TableLike { - private final List columns; private List files; private Map loadOptions; - public SelectFromFileStatement() + public StagedFilesTable(List files, Map loadOptions) { - columns = new ArrayList<>(); + this.files = files; + this.loadOptions = loadOptions; } - /* - Select from file GENERIC PLAN for Big Query: - (COLUMN_LIST) - FROM FILES (LOAD_OPTIONS) - */ @Override public void genSql(StringBuilder builder) throws SqlDomException { validate(); - builder.append(OPEN_PARENTHESIS); - SqlGen.genSqlList(builder, columns, EMPTY, COMMA); - builder.append(CLOSING_PARENTHESIS); - - builder.append(WHITE_SPACE); - builder.append(Clause.FROM_FILES.get()); + builder.append(Clause.FILES.get()); builder.append(WHITE_SPACE); builder.append(OPEN_PARENTHESIS); @@ -109,18 +95,6 @@ public void genSql(StringBuilder builder) throws SqlDomException @Override public void push(Object node) { - if (node instanceof Value) - { - columns.add((Value) node); - } - if (node instanceof Map) - { - loadOptions = (Map) node; - } - if (node instanceof List) - { - files = (List) node; - } } void validate() throws SqlDomException diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java index e447bfac457..b693010cb79 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java @@ -14,21 +14,34 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.statements; +import org.finos.legend.engine.persistence.components.relational.bigquery.sqldom.schemaops.expressions.table.StagedFilesTable; import org.finos.legend.engine.persistence.components.relational.sqldom.SqlDomException; +import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; +import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; + +import java.util.ArrayList; +import java.util.List; import static org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause.LOAD_DATA; import static org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause.OVERWRITE; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.CLOSING_PARENTHESIS; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.COMMA; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.EMPTY; +import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; public class CopyStatement implements DMLStatement { private Table table; - private SelectFromFileStatement selectFromFileStatement; + private StagedFilesTable stagedFilesTable; + private List columns; public CopyStatement() { + columns = new ArrayList<>(); } /* @@ -48,7 +61,12 @@ public void genSql(StringBuilder builder) throws SqlDomException table.genSqlWithoutAlias(builder); builder.append(WHITE_SPACE); - selectFromFileStatement.genSql(builder); + builder.append(OPEN_PARENTHESIS); + SqlGen.genSqlList(builder, columns, EMPTY, COMMA); + builder.append(CLOSING_PARENTHESIS); + + builder.append(WHITE_SPACE + Clause.FROM.get() + WHITE_SPACE); + stagedFilesTable.genSql(builder); } @Override @@ -58,17 +76,21 @@ public void push(Object node) { table = (Table) node; } - else if (node instanceof SelectFromFileStatement) + else if (node instanceof StagedFilesTable) + { + stagedFilesTable = (StagedFilesTable) node; + } + else if (node instanceof Value) { - selectFromFileStatement = (SelectFromFileStatement) node; + columns.add((Value) node); } } void validate() throws SqlDomException { - if (selectFromFileStatement == null) + if (stagedFilesTable == null) { - throw new SqlDomException("selectFromFileStatement is mandatory for Copy Table Command"); + throw new SqlDomException("stagedFilesTable is mandatory for Copy Table Command"); } if (table == null) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java index 46c2e5c3574..a68b724f02f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/sqldom/common/Clause.java @@ -61,7 +61,7 @@ public enum Clause ARRAY("ARRAY"), LOAD_DATA("LOAD DATA"), OVERWRITE("OVERWRITE"), - FROM_FILES("FROM FILES"); + FILES("FILES"); private final String clause; From 6611f9065bc51f7d877c1a8d83f81ddcc124a8c4 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 26 Sep 2023 17:42:52 +0800 Subject: [PATCH 52/57] Fix other comments --- .../components/planner/BulkLoadPlanner.java | 48 +++++++------------ .../components/util/Capability.java | 2 +- .../components/relational/h2/H2Sink.java | 2 +- .../relational/snowflake/SnowflakeSink.java | 2 +- 4 files changed, 21 insertions(+), 33 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index ca62f9b7810..a89ee2dfa25 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -42,7 +42,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.operations.Create; import org.finos.legend.engine.persistence.components.logicalplan.operations.Copy; import org.finos.legend.engine.persistence.components.logicalplan.operations.Operation; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.DigestUdf; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; @@ -63,7 +62,7 @@ class BulkLoadPlanner extends Planner { - private boolean allowExtraFieldsWhileCopying; + private boolean transformWhileCopy; private Dataset tempDataset; private StagedFilesDataset stagedFilesDataset; private BulkLoadMetadataDataset bulkLoadMetadataDataset; @@ -81,16 +80,16 @@ class BulkLoadPlanner extends Planner stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); bulkLoadMetadataDataset = bulkLoadMetadataDataset().orElseThrow(IllegalStateException::new); - allowExtraFieldsWhileCopying = capabilities.contains(Capability.ALLOW_EXTRA_FIELDS_WHILE_COPYING); - if (!allowExtraFieldsWhileCopying) + transformWhileCopy = capabilities.contains(Capability.TRANSFORM_WHILE_COPY); + if (!transformWhileCopy) { - tempDataset = datasets.tempDataset().orElse(DatasetDefinition.builder() + tempDataset = DatasetDefinition.builder() .schema(datasets.stagingDataset().schema()) .database(datasets.mainDataset().datasetReference().database()) .group(datasets.mainDataset().datasetReference().group()) .name(datasets.mainDataset().datasetReference().name().orElseThrow((IllegalStateException::new)) + UNDERSCORE + TEMP_DATASET_BASE_NAME) .alias(TEMP_DATASET_BASE_NAME) - .build()); + .build(); } } @@ -103,17 +102,17 @@ protected BulkLoad ingestMode() @Override public LogicalPlan buildLogicalPlanForIngest(Resources resources) { - if (allowExtraFieldsWhileCopying) + if (transformWhileCopy) { - return buildLogicalPlanForIngestUsingCopy(resources); + return buildLogicalPlanForTransformWhileCopy(resources); } else { - return buildLogicalPlanForIngestUsingCopyAndInsert(resources); + return buildLogicalPlanForCopyAndTransform(resources); } } - private LogicalPlan buildLogicalPlanForIngestUsingCopy(Resources resources) + private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) { List fieldsToSelect = LogicalPlanUtils.extractStagedFilesFieldValues(stagingDataset()); List fieldsToInsert = new ArrayList<>(stagingDataset().schemaReference().fieldValues()); @@ -135,7 +134,7 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopy(Resources resources) return LogicalPlan.of(Collections.singletonList(Copy.of(mainDataset(), selectStage, fieldsToInsert))); } - private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resources) + private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) { List operations = new ArrayList<>(); @@ -151,7 +150,7 @@ private LogicalPlan buildLogicalPlanForIngestUsingCopyAndInsert(Resources resour List fieldsToInsertIntoMain = new ArrayList<>(tempDataset.schemaReference().fieldValues()); // Add digest - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), stagingDataset(), tempDataset, fieldsToSelectFromTemp, fieldsToInsertIntoMain)); + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), tempDataset, fieldsToSelectFromTemp, fieldsToInsertIntoMain)); // Add batch_id field fieldsToInsertIntoMain.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); @@ -183,7 +182,7 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) List operations = new ArrayList<>(); operations.add(Create.of(true, mainDataset())); operations.add(Create.of(true, bulkLoadMetadataDataset.get())); - if (!allowExtraFieldsWhileCopying) + if (!transformWhileCopy) { operations.add(Create.of(true, tempDataset)); } @@ -194,7 +193,7 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) public LogicalPlan buildLogicalPlanForPostActions(Resources resources) { List operations = new ArrayList<>(); - if (!allowExtraFieldsWhileCopying) + if (!transformWhileCopy) { operations.add(Delete.builder().dataset(tempDataset).build()); } @@ -205,7 +204,7 @@ public LogicalPlan buildLogicalPlanForPostActions(Resources resources) public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) { List operations = new ArrayList<>(); - if (!allowExtraFieldsWhileCopying) + if (!transformWhileCopy) { operations.add(Drop.of(true, tempDataset, true)); } @@ -273,22 +272,11 @@ static class DigestGeneration implements DigestGenStrategyVisitor private List fieldsToInsert; private Dataset stagingDataset; private Dataset mainDataset; - private Optional tempDataset; public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, List fieldsToSelect, List fieldsToInsert) { this.mainDataset = mainDataset; this.stagingDataset = stagingDataset; - this.tempDataset = Optional.empty(); - this.fieldsToSelect = fieldsToSelect; - this.fieldsToInsert = fieldsToInsert; - } - - public DigestGeneration(Dataset mainDataset, Dataset stagingDataset, Dataset tempDataset, List fieldsToSelect, List fieldsToInsert) - { - this.mainDataset = mainDataset; - this.stagingDataset = stagingDataset; - this.tempDataset = Optional.of(tempDataset); this.fieldsToSelect = fieldsToSelect; this.fieldsToInsert = fieldsToInsert; } @@ -302,13 +290,13 @@ public Void visitNoDigestGenStrategy(NoDigestGenStrategyAbstract noDigestGenStra @Override public Void visitUDFBasedDigestGenStrategy(UDFBasedDigestGenStrategyAbstract udfBasedDigestGenStrategy) { - DigestUdf.Builder digestValueBuilder = DigestUdf + Value digestValue = DigestUdf .builder() .udfName(udfBasedDigestGenStrategy.digestUdfName()) .addAllFieldNames(stagingDataset.schemaReference().fieldValues().stream().map(fieldValue -> fieldValue.fieldName()).collect(Collectors.toList())) - .addAllValues(fieldsToSelect); - tempDataset.ifPresent(digestValueBuilder::dataset); - Value digestValue = digestValueBuilder.build(); + .addAllValues(fieldsToSelect) + .dataset(stagingDataset) + .build(); String digestField = udfBasedDigestGenStrategy.digestField(); fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset.datasetReference()).fieldName(digestField).build()); fieldsToSelect.add(digestValue); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java index 409c73a2922..f99f9f94a23 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/Capability.java @@ -22,5 +22,5 @@ public enum Capability EXPLICIT_DATA_TYPE_CONVERSION, DATA_TYPE_LENGTH_CHANGE, DATA_TYPE_SCALE_CHANGE, - ALLOW_EXTRA_FIELDS_WHILE_COPYING; + TRANSFORM_WHILE_COPY; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 5d4f0ac1676..528fb07dec6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -97,7 +97,7 @@ public class H2Sink extends AnsiSqlSink capabilities.add(Capability.EXPLICIT_DATA_TYPE_CONVERSION); capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); capabilities.add(Capability.DATA_TYPE_SCALE_CHANGE); - capabilities.add(Capability.ALLOW_EXTRA_FIELDS_WHILE_COPYING); + capabilities.add(Capability.TRANSFORM_WHILE_COPY); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index a1061edce23..4ac0d7ab354 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -109,7 +109,7 @@ public class SnowflakeSink extends AnsiSqlSink capabilities.add(Capability.ADD_COLUMN); capabilities.add(Capability.IMPLICIT_DATA_TYPE_CONVERSION); capabilities.add(Capability.DATA_TYPE_LENGTH_CHANGE); - capabilities.add(Capability.ALLOW_EXTRA_FIELDS_WHILE_COPYING); + capabilities.add(Capability.TRANSFORM_WHILE_COPY); CAPABILITIES = Collections.unmodifiableSet(capabilities); Map, LogicalPlanVisitor> logicalPlanVisitorByClass = new HashMap<>(); From 33f9e883d6c3a324d7c52aa197da486d9c7307ad Mon Sep 17 00:00:00 2001 From: kumuwu Date: Thu, 28 Sep 2023 17:46:07 +0800 Subject: [PATCH 53/57] Fix big query bulk load ingestor flow and add end-to-end tests --- .../components/planner/BulkLoadPlanner.java | 2 +- .../relational/bigquery/BigQuerySink.java | 21 +-- .../bigquery/executor/BigQueryExecutor.java | 17 +++ .../bigquery/executor/BigQueryHelper.java | 29 ++++ .../executor/BigQueryTransactionManager.java | 15 +- .../schemaops/statements/CopyStatement.java | 4 +- .../components/e2e/BulkLoadExecutorTest.java | 130 +++++++++++++++++ .../components/e2e/BulkLoadGeneratorTest.java | 138 ++++++++++++++++++ .../expected/bulk_load/expected_table1.csv | 7 + .../input/bulk_load/staged_file1.csv | 3 + .../input/bulk_load/staged_file2.csv | 3 + .../input/bulk_load/staged_file3.csv | 1 + 12 files changed, 347 insertions(+), 23 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file1.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file3.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index a89ee2dfa25..6d2c38027c3 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -206,7 +206,7 @@ public LogicalPlan buildLogicalPlanForPostCleanup(Resources resources) List operations = new ArrayList<>(); if (!transformWhileCopy) { - operations.add(Drop.of(true, tempDataset, true)); + operations.add(Drop.of(true, tempDataset, false)); } return LogicalPlan.of(operations); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index ef0eb7990ed..4c695c7d6cb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -257,25 +257,8 @@ public Field createNewField(Field evolveTo, Field evolveFrom, Optional @Override public IngestorResult performBulkLoad(Datasets datasets, Executor executor, SqlPlan ingestSqlPlan, Map statisticsSqlPlan, Map placeHolderKeyValues) { - executor.executePhysicalPlan(ingestSqlPlan, placeHolderKeyValues); - - Map stats = new HashMap<>(); - StagedFilesDataset stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); - stats.put(StatisticName.FILES_LOADED, stagedFilesDataset.stagedFilesDatasetProperties().files().size()); // todo: check this - stats.put(StatisticName.ROWS_WITH_ERRORS, 0); // todo: check this - - SqlPlan rowsInsertedSqlPlan = statisticsSqlPlan.get(StatisticName.ROWS_INSERTED); - if (rowsInsertedSqlPlan != null) - { - stats.put(StatisticName.ROWS_INSERTED, executor.executePhysicalPlanAndGetResults(rowsInsertedSqlPlan, placeHolderKeyValues) - .stream() - .findFirst() - .map(TabularData::getData) - .flatMap(t -> t.stream().findFirst()) - .map(Map::values) - .flatMap(t -> t.stream().findFirst()) - .orElseThrow(IllegalStateException::new)); - } + BigQueryExecutor bigQueryExecutor = (BigQueryExecutor) executor; + Map stats = bigQueryExecutor.executeLoadPhysicalPlanAndGetStats(ingestSqlPlan, placeHolderKeyValues); IngestorResult result; result = IngestorResult.builder() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java index ddddb6a06e7..d2da804ec33 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java @@ -14,6 +14,7 @@ package org.finos.legend.engine.persistence.components.relational.bigquery.executor; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.Executor; import org.finos.legend.engine.persistence.components.executor.RelationalExecutionHelper; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; @@ -70,6 +71,22 @@ public void executePhysicalPlan(SqlPlan physicalPlan, Map placeh } } + public Map executeLoadPhysicalPlanAndGetStats(SqlPlan physicalPlan, Map placeholderKeyValues) + { + List sqlList = physicalPlan.getSqlList(); + + // The first SQL is a load statement + // Executed in a new transaction + Map loadStats = bigQueryHelper.executeLoadStatement(getEnrichedSql(placeholderKeyValues, sqlList.get(0))); + + // The second SQL is an insert statement + // We need to first close the current transaction (if it exists) and open a new transaction + // Such that the result of the Load will be available to the Insert + bigQueryHelper.close(); + bigQueryHelper.executeStatement(getEnrichedSql(placeholderKeyValues, sqlList.get(1))); + return loadStats; + } + @Override public List executePhysicalPlanAndGetResults(SqlPlan physicalPlan) { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 84c5ed5f186..1336c32eda2 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -17,6 +17,7 @@ import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.FieldList; import com.google.cloud.bigquery.TableId; +import org.finos.legend.engine.persistence.components.common.StatisticName; import org.finos.legend.engine.persistence.components.executor.TypeMapping; import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.And; @@ -424,6 +425,34 @@ public List> executeQuery(String sql) } } + public Map executeLoadStatement(String sql) + { + BigQueryTransactionManager txManager = null; + try + { + txManager = new BigQueryTransactionManager(bigQuery); + return txManager.executeLoadStatement(sql); + } + catch (Exception e) + { + throw new RuntimeException("Error executing SQL query: " + sql, e); + } + finally + { + if (txManager != null) + { + try + { + txManager.close(); + } + catch (InterruptedException e) + { + LOGGER.error("Error closing transaction manager.", e); + } + } + } + } + @Override public void close() { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java index 7db1a4c5ad8..555f991ec95 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java @@ -22,8 +22,9 @@ import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobId; import com.google.cloud.bigquery.JobInfo; -import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.cloud.bigquery.JobStatistics; import com.google.cloud.bigquery.QueryJobConfiguration; +import org.finos.legend.engine.persistence.components.common.StatisticName; import java.util.ArrayList; import java.util.Arrays; @@ -98,6 +99,18 @@ public boolean executeInCurrentTransaction(String sql) throws InterruptedExcepti return job.getStatus().getError() == null; } + public Map executeLoadStatement(String sql) throws InterruptedException + { + Map stats = new HashMap<>(); + + Job job = this.executeSql(sql); + JobStatistics.QueryStatistics queryStatistics = job.getStatistics(); + + stats.put(StatisticName.ROWS_INSERTED, queryStatistics.getQueryPlan().get(0).getRecordsWritten()); + + return stats; + } + public List> convertResultSetToList(String sql) { try diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java index b693010cb79..d1fe2feaf82 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sqldom/schemaops/statements/CopyStatement.java @@ -19,7 +19,7 @@ import org.finos.legend.engine.persistence.components.relational.sqldom.SqlGen; import org.finos.legend.engine.persistence.components.relational.sqldom.common.Clause; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.expresssions.table.Table; -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DMLStatement; +import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.statements.DDLStatement; import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.Value; import java.util.ArrayList; @@ -33,7 +33,7 @@ import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.OPEN_PARENTHESIS; import static org.finos.legend.engine.persistence.components.relational.sqldom.utils.SqlGenUtils.WHITE_SPACE; -public class CopyStatement implements DMLStatement +public class CopyStatement implements DDLStatement { private Table table; private StagedFilesTable stagedFilesTable; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java new file mode 100644 index 00000000000..88000f0f58d --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java @@ -0,0 +1,130 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.e2e; + +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; +import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; +import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; +import org.finos.legend.engine.persistence.components.relational.bigquery.executor.BigQueryConnection; +import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; + +@Disabled +public class BulkLoadExecutorTest extends BigQueryEndToEndTest +{ + private static final String APPEND_TIME = "append_time"; + private static final String BATCH_ID = "batch_id"; + private static final String BATCH_ID_VALUE = "xyz123"; + private static final String col_int = "col_int"; + private static final String col_string = "col_string"; + private static final String col_decimal = "col_decimal"; + private static final String col_datetime = "col_datetime"; + private static final List file_list = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); + private static Field col1 = Field.builder() + .name(col_int) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .build(); + private static Field col2 = Field.builder() + .name(col_string) + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .build(); + private static Field col3 = Field.builder() + .name(col_decimal) + .type(FieldType.of(DataType.DECIMAL, 5, 2)) + .build(); + private static Field col4 = Field.builder() + .name(col_datetime) + .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) + .build(); + + @Test + public void testMilestoning() throws IOException, InterruptedException + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .addAllFiles(file_list).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .group("demo").name("append_log") + .schema(SchemaDefinition.builder().build()) + .build(); + + BulkLoadMetadataDataset bulkLoadMetadataDataset = BulkLoadMetadataDataset.builder().group("demo").name("bulk_load_batch_metadata").build(); + + Datasets datasets = Datasets.builder().mainDataset(mainDataset).stagingDataset(stagedFilesDataset).bulkLoadMetadataDataset(bulkLoadMetadataDataset).build(); + + // Clean up + delete("demo", "main"); + delete("demo", "staging"); + delete("demo", "batch_metadata"); + delete("demo", "append_log"); + delete("demo", "bulk_load_batch_metadata"); + + + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .build(); + + RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); + IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets); + + // Verify + List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); + String expectedPath = "src/test/resources/expected/bulk_load/expected_table1.csv"; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + + long rowsInserted = (long) ingestorResult.statisticByName().get(ROWS_INSERTED); + Assertions.assertEquals(7, rowsInserted); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java new file mode 100644 index 00000000000..d94b84aa583 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java @@ -0,0 +1,138 @@ +// Copyright 2023 Goldman Sachs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.finos.legend.engine.persistence.components.e2e; + +import org.finos.legend.engine.persistence.components.common.Datasets; +import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; +import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DataType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.relational.api.GeneratorResult; +import org.finos.legend.engine.persistence.components.relational.api.RelationalGenerator; +import org.finos.legend.engine.persistence.components.relational.bigquery.BigQuerySink; +import org.finos.legend.engine.persistence.components.relational.bigquery.logicalplan.datasets.BigQueryStagedFilesDatasetProperties; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +@Disabled +public class BulkLoadGeneratorTest extends BigQueryEndToEndTest +{ + private static final String APPEND_TIME = "append_time"; + private static final String BATCH_ID = "batch_id"; + private static final String BATCH_ID_VALUE = "xyz123"; + private static final String col_int = "col_int"; + private static final String col_string = "col_string"; + private static final String col_decimal = "col_decimal"; + private static final String col_datetime = "col_datetime"; + private static final List file_list = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); + private static Field col1 = Field.builder() + .name(col_int) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .build(); + private static Field col2 = Field.builder() + .name(col_string) + .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) + .build(); + private static Field col3 = Field.builder() + .name(col_decimal) + .type(FieldType.of(DataType.DECIMAL, 5, 2)) + .build(); + private static Field col4 = Field.builder() + .name(col_datetime) + .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) + .build(); + + @Test + public void testMilestoning() throws IOException, InterruptedException + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .addAllFiles(file_list).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .group("demo").name("append_log") + .schema(SchemaDefinition.builder().build()) + .build(); + + BulkLoadMetadataDataset bulkLoadMetadataDataset = BulkLoadMetadataDataset.builder().group("demo").name("bulk_load_batch_metadata").build(); + + Datasets datasets = Datasets.builder().mainDataset(mainDataset).stagingDataset(stagedFilesDataset).bulkLoadMetadataDataset(bulkLoadMetadataDataset).build(); + + // Clean up + delete("demo", "main"); + delete("demo", "staging"); + delete("demo", "batch_metadata"); + delete("demo", "append_log"); + delete("demo", "bulk_load_batch_metadata"); + + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadBatchStatusPattern("{STATUS}") + .build(); + + GeneratorResult operations = generator.generateOperations(datasets); + List preActionsSqlList = operations.preActionsSql(); + List milestoningSqlList = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); + List postActionsSql = operations.postActionsSql(); + + List newMetadataIngestSql = new ArrayList<>(); + for (String metadataSql : metadataIngestSql) + { + String newSql = metadataSql.replace("{STATUS}", "SUCCEEDED"); + newMetadataIngestSql.add(newSql); + } + metadataIngestSql = newMetadataIngestSql; + + + ingest(preActionsSqlList, milestoningSqlList, metadataIngestSql, postActionsSql); + + // Verify + List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); + String expectedPath = "src/test/resources/expected/bulk_load/expected_table1.csv"; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + } +} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv new file mode 100644 index 00000000000..259d7359904 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv @@ -0,0 +1,7 @@ +0,Candy,999.99,2022-01-15T00:00:00,xyz123,2000-01-01T00:00:00 +1,Andy,5.2,2022-01-11T00:00:00,xyz123,2000-01-01T00:00:00 +1,Andy,5.2,2022-01-11T00:00:00,xyz123,2000-01-01T00:00:00 +2,Bella,99.99,2022-01-12T00:00:00,xyz123,2000-01-01T00:00:00 +2,Bella,99.99,2022-01-12T00:00:00,xyz123,2000-01-01T00:00:00 +49,Sandy,123.45,2022-01-13T00:00:00,xyz123,2000-01-01T00:00:00 +50,Mindy,0,2022-01-14T00:00:00,xyz123,2000-01-01T00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file1.csv new file mode 100644 index 00000000000..dd2941bedb8 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file1.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file2.csv new file mode 100644 index 00000000000..a4e5d3b6eb9 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file2.csv @@ -0,0 +1,3 @@ +1,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0 +50,Mindy,0.00,2022-01-14 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file3.csv new file mode 100644 index 00000000000..1ec00ee9883 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/staged_file3.csv @@ -0,0 +1 @@ +0,Candy,999.99,2022-01-15 00:00:00.0 \ No newline at end of file From e1a5f5d0ccca482f4c8a5aa40e2521aaf976a4ba Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 3 Oct 2023 11:10:16 +0800 Subject: [PATCH 54/57] Add rows with error handling and test --- .../relational/bigquery/BigQuerySink.java | 24 +++++-- .../executor/BigQueryTransactionManager.java | 6 +- .../components/e2e/BulkLoadExecutorTest.java | 66 +++++++++++++++++++ .../expected/bulk_load/expected_table2.csv | 4 ++ .../resources/input/bulk_load/bad_file.csv | 3 + 5 files changed, 96 insertions(+), 7 deletions(-) create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv create mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/bad_file.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 4c695c7d6cb..013a123a646 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -261,12 +261,24 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = bigQueryExecutor.executeLoadPhysicalPlanAndGetStats(ingestSqlPlan, placeHolderKeyValues); IngestorResult result; - result = IngestorResult.builder() - .status(IngestStatus.SUCCEEDED) - .updatedDatasets(datasets) - .putAllStatisticByName(stats) - .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) - .build(); + if ((long) stats.get(StatisticName.ROWS_WITH_ERRORS) == 0) + { + result = IngestorResult.builder() + .status(IngestStatus.SUCCEEDED) + .updatedDatasets(datasets) + .putAllStatisticByName(stats) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) + .build(); + } + else + { + result = IngestorResult.builder() + .status(IngestStatus.FAILED) + .updatedDatasets(datasets) + .putAllStatisticByName(stats) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) + .build(); + } return result; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java index 555f991ec95..9f7b4e376a8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryTransactionManager.java @@ -106,7 +106,11 @@ public Map executeLoadStatement(String sql) throws Interr Job job = this.executeSql(sql); JobStatistics.QueryStatistics queryStatistics = job.getStatistics(); - stats.put(StatisticName.ROWS_INSERTED, queryStatistics.getQueryPlan().get(0).getRecordsWritten()); + long recordsWritten = queryStatistics.getQueryPlan().get(0).getRecordsWritten(); + long recordsRead = queryStatistics.getQueryPlan().get(0).getRecordsRead(); + + stats.put(StatisticName.ROWS_INSERTED, recordsWritten); + stats.put(StatisticName.ROWS_WITH_ERRORS, recordsRead - recordsWritten); return stats; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java index 88000f0f58d..58bd32c51c0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java @@ -16,6 +16,7 @@ import org.finos.legend.engine.persistence.components.common.Datasets; import org.finos.legend.engine.persistence.components.common.FileFormat; +import org.finos.legend.engine.persistence.components.common.LoadOptions; import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; @@ -26,6 +27,7 @@ import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; import org.finos.legend.engine.persistence.components.logicalplan.datasets.SchemaDefinition; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDataset; +import org.finos.legend.engine.persistence.components.relational.api.IngestStatus; import org.finos.legend.engine.persistence.components.relational.api.IngestorResult; import org.finos.legend.engine.persistence.components.relational.api.RelationalConnection; import org.finos.legend.engine.persistence.components.relational.api.RelationalIngestor; @@ -44,6 +46,7 @@ import java.util.Optional; import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_INSERTED; +import static org.finos.legend.engine.persistence.components.common.StatisticName.ROWS_WITH_ERRORS; @Disabled public class BulkLoadExecutorTest extends BigQueryEndToEndTest @@ -56,6 +59,7 @@ public class BulkLoadExecutorTest extends BigQueryEndToEndTest private static final String col_decimal = "col_decimal"; private static final String col_datetime = "col_datetime"; private static final List file_list = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); + private static final List bad_file_list = Arrays.asList("the uri to the bad_file.csv on GCS", "the uri to the staged_file1.csv on GCS"); private static Field col1 = Field.builder() .name(col_int) .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) @@ -125,6 +129,68 @@ public void testMilestoning() throws IOException, InterruptedException assertFileAndTableDataEquals(schema, expectedPath, tableData); long rowsInserted = (long) ingestorResult.statisticByName().get(ROWS_INSERTED); + long rowsWithErrors = (long) ingestorResult.statisticByName().get(ROWS_WITH_ERRORS); Assertions.assertEquals(7, rowsInserted); + Assertions.assertEquals(0, rowsWithErrors); + Assertions.assertEquals(IngestStatus.SUCCEEDED, ingestorResult.status()); + } + + @Test + public void testMilestoningFailure() throws IOException, InterruptedException + { + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(DateTimeAuditing.builder().dateTimeField(APPEND_TIME).build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + BigQueryStagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .loadOptions(LoadOptions.builder().maxBadRecords(10L).build()) + .addAllFiles(bad_file_list).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .group("demo").name("append_log") + .schema(SchemaDefinition.builder().build()) + .build(); + + BulkLoadMetadataDataset bulkLoadMetadataDataset = BulkLoadMetadataDataset.builder().group("demo").name("bulk_load_batch_metadata").build(); + + Datasets datasets = Datasets.builder().mainDataset(mainDataset).stagingDataset(stagedFilesDataset).bulkLoadMetadataDataset(bulkLoadMetadataDataset).build(); + + // Clean up + delete("demo", "main"); + delete("demo", "staging"); + delete("demo", "batch_metadata"); + delete("demo", "append_log"); + delete("demo", "bulk_load_batch_metadata"); + + + RelationalIngestor ingestor = RelationalIngestor.builder() + .ingestMode(bulkLoad) + .relationalSink(BigQuerySink.get()) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .build(); + + RelationalConnection connection = BigQueryConnection.of(getBigQueryConnection()); + IngestorResult ingestorResult = ingestor.performFullIngestion(connection, datasets); + + // Verify + List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); + String expectedPath = "src/test/resources/expected/bulk_load/expected_table2.csv"; + String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + assertFileAndTableDataEquals(schema, expectedPath, tableData); + + long rowsInserted = (long) ingestorResult.statisticByName().get(ROWS_INSERTED); + long rowsWithErrors = (long) ingestorResult.statisticByName().get(ROWS_WITH_ERRORS); + Assertions.assertEquals(4, rowsInserted); + Assertions.assertEquals(2, rowsWithErrors); + Assertions.assertEquals(IngestStatus.FAILED, ingestorResult.status()); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv new file mode 100644 index 00000000000..4dfc256dd31 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv @@ -0,0 +1,4 @@ +1,Andy,5.2,2022-01-11T00:00:00,xyz123,2000-01-01T00:00:00 +2,Bella,99.99,2022-01-12T00:00:00,xyz123,2000-01-01T00:00:00 +11,Success,123.45,2022-01-13T00:00:00,xyz123,2000-01-01T00:00:00 +49,Sandy,123.45,2022-01-13T00:00:00,xyz123,2000-01-01T00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/bad_file.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/bad_file.csv new file mode 100644 index 00000000000..1c941007414 --- /dev/null +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/input/bulk_load/bad_file.csv @@ -0,0 +1,3 @@ +hello,Andy,5.20,2022-01-11 00:00:00.0 +2,Bella,99.99,2022-01-99 00:00:00.0 +11,Success,123.45,2022-01-13 00:00:00.0 \ No newline at end of file From 577082c072f1d0e01a658b9d91aee41b1ebc7c99 Mon Sep 17 00:00:00 2001 From: kumuwu Date: Tue, 3 Oct 2023 14:26:38 +0800 Subject: [PATCH 55/57] Address comments --- .../relational/bigquery/BigQuerySink.java | 16 +++++------ .../components/e2e/BulkLoadExecutorTest.java | 28 +++++++++---------- .../components/e2e/BulkLoadGeneratorTest.java | 22 +++++++-------- .../components/ingestmode/BulkLoadTest.java | 21 +++++++------- .../ingestmode/bulkload/BulkLoadTest.java | 24 ++++++++-------- .../relational/snowflake/SnowflakeSink.java | 27 +++++++++--------- 6 files changed, 67 insertions(+), 71 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 013a123a646..9ea03a4b618 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -260,26 +260,24 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = bigQueryExecutor.executeLoadPhysicalPlanAndGetStats(ingestSqlPlan, placeHolderKeyValues); + IngestorResult.Builder resultBuilder = IngestorResult.builder() + .updatedDatasets(datasets) + .putAllStatisticByName(stats) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)); IngestorResult result; + if ((long) stats.get(StatisticName.ROWS_WITH_ERRORS) == 0) { - result = IngestorResult.builder() + result = resultBuilder .status(IngestStatus.SUCCEEDED) - .updatedDatasets(datasets) - .putAllStatisticByName(stats) - .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) .build(); } else { - result = IngestorResult.builder() + result = resultBuilder .status(IngestStatus.FAILED) - .updatedDatasets(datasets) - .putAllStatisticByName(stats) - .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) .build(); } - return result; } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java index 58bd32c51c0..803f17cd7e5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadExecutorTest.java @@ -54,26 +54,26 @@ public class BulkLoadExecutorTest extends BigQueryEndToEndTest private static final String APPEND_TIME = "append_time"; private static final String BATCH_ID = "batch_id"; private static final String BATCH_ID_VALUE = "xyz123"; - private static final String col_int = "col_int"; - private static final String col_string = "col_string"; - private static final String col_decimal = "col_decimal"; - private static final String col_datetime = "col_datetime"; - private static final List file_list = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); - private static final List bad_file_list = Arrays.asList("the uri to the bad_file.csv on GCS", "the uri to the staged_file1.csv on GCS"); + private static final String COL_INT = "col_int"; + private static final String COL_STRING = "col_string"; + private static final String COL_DECIMAL = "col_decimal"; + private static final String COL_DATETIME = "col_datetime"; + private static final List FILE_LIST = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); + private static final List BAD_FILE_LIST = Arrays.asList("the uri to the bad_file.csv on GCS", "the uri to the staged_file1.csv on GCS"); private static Field col1 = Field.builder() - .name(col_int) + .name(COL_INT) .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) .build(); private static Field col2 = Field.builder() - .name(col_string) + .name(COL_STRING) .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) .build(); private static Field col3 = Field.builder() - .name(col_decimal) + .name(COL_DECIMAL) .type(FieldType.of(DataType.DECIMAL, 5, 2)) .build(); private static Field col4 = Field.builder() - .name(col_datetime) + .name(COL_DATETIME) .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) .build(); @@ -90,7 +90,7 @@ public void testMilestoning() throws IOException, InterruptedException .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() .fileFormat(FileFormat.CSV) - .addAllFiles(file_list).build()) + .addAllFiles(FILE_LIST).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -125,7 +125,7 @@ public void testMilestoning() throws IOException, InterruptedException // Verify List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); String expectedPath = "src/test/resources/expected/bulk_load/expected_table1.csv"; - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, BATCH_ID, APPEND_TIME}; assertFileAndTableDataEquals(schema, expectedPath, tableData); long rowsInserted = (long) ingestorResult.statisticByName().get(ROWS_INSERTED); @@ -149,7 +149,7 @@ public void testMilestoningFailure() throws IOException, InterruptedException BigQueryStagedFilesDatasetProperties.builder() .fileFormat(FileFormat.CSV) .loadOptions(LoadOptions.builder().maxBadRecords(10L).build()) - .addAllFiles(bad_file_list).build()) + .addAllFiles(BAD_FILE_LIST).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -184,7 +184,7 @@ public void testMilestoningFailure() throws IOException, InterruptedException // Verify List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); String expectedPath = "src/test/resources/expected/bulk_load/expected_table2.csv"; - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, BATCH_ID, APPEND_TIME}; assertFileAndTableDataEquals(schema, expectedPath, tableData); long rowsInserted = (long) ingestorResult.statisticByName().get(ROWS_INSERTED); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java index d94b84aa583..8fe2a9c75cf 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/e2e/BulkLoadGeneratorTest.java @@ -47,25 +47,25 @@ public class BulkLoadGeneratorTest extends BigQueryEndToEndTest private static final String APPEND_TIME = "append_time"; private static final String BATCH_ID = "batch_id"; private static final String BATCH_ID_VALUE = "xyz123"; - private static final String col_int = "col_int"; - private static final String col_string = "col_string"; - private static final String col_decimal = "col_decimal"; - private static final String col_datetime = "col_datetime"; - private static final List file_list = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); + private static final String COL_INT = "col_int"; + private static final String COL_STRING = "col_string"; + private static final String COL_DECIMAL = "col_decimal"; + private static final String COL_DATETIME = "col_datetime"; + private static final List FILE_LIST = Arrays.asList("the uri to the staged_file1.csv on GCS", "the uri to the staged_file2.csv on GCS", "the uri to the staged_file3.csv on GCS"); private static Field col1 = Field.builder() - .name(col_int) + .name(COL_INT) .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) .build(); private static Field col2 = Field.builder() - .name(col_string) + .name(COL_STRING) .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) .build(); private static Field col3 = Field.builder() - .name(col_decimal) + .name(COL_DECIMAL) .type(FieldType.of(DataType.DECIMAL, 5, 2)) .build(); private static Field col4 = Field.builder() - .name(col_datetime) + .name(COL_DATETIME) .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) .build(); @@ -82,7 +82,7 @@ public void testMilestoning() throws IOException, InterruptedException .stagedFilesDatasetProperties( BigQueryStagedFilesDatasetProperties.builder() .fileFormat(FileFormat.CSV) - .addAllFiles(file_list).build()) + .addAllFiles(FILE_LIST).build()) .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) .build(); @@ -132,7 +132,7 @@ public void testMilestoning() throws IOException, InterruptedException // Verify List> tableData = runQuery("select * from `demo`.`append_log` order by col_int asc"); String expectedPath = "src/test/resources/expected/bulk_load/expected_table1.csv"; - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, BATCH_ID, APPEND_TIME}; assertFileAndTableDataEquals(schema, expectedPath, tableData); } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java index 5cef940d852..baa2b8410e0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/BulkLoadTest.java @@ -18,7 +18,6 @@ import org.finos.legend.engine.persistence.components.common.FileFormat; import org.finos.legend.engine.persistence.components.common.LoadOptions; import org.finos.legend.engine.persistence.components.common.StatisticName; -import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; import org.finos.legend.engine.persistence.components.ingestmode.audit.DateTimeAuditing; import org.finos.legend.engine.persistence.components.ingestmode.audit.NoAuditing; import org.finos.legend.engine.persistence.components.ingestmode.digest.NoDigestGenStrategy; @@ -58,32 +57,32 @@ public class BulkLoadTest private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; private static final String BATCH_ID = "batch_id"; private static final String BATCH_ID_VALUE = "xyz123"; - private static final String col_int = "col_int"; - private static final String col_string = "col_string"; - private static final String col_decimal = "col_decimal"; - private static final String col_datetime = "col_datetime"; - private static final String col_variant = "col_variant"; + private static final String COL_INT = "col_int"; + private static final String COL_STRING = "col_string"; + private static final String COL_DECIMAL = "col_decimal"; + private static final String COL_DATETIME = "col_datetime"; + private static final String COL_VARIANT = "col_variant"; private static Field col1 = Field.builder() - .name(col_int) + .name(COL_INT) .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) .primaryKey(true) .build(); private static Field col2 = Field.builder() - .name(col_string) + .name(COL_STRING) .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) .build(); private static Field col3 = Field.builder() - .name(col_decimal) + .name(COL_DECIMAL) .type(FieldType.of(DataType.DECIMAL, 5, 2)) .build(); private static Field col4 = Field.builder() - .name(col_datetime) + .name(COL_DATETIME) .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) .build(); private static Field col5 = Field.builder() - .name(col_variant) + .name(COL_VARIANT) .type(FieldType.of(DataType.VARIANT, Optional.empty(), Optional.empty())) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java index a4da48a42c6..ed8654dcee4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/java/org/finos/legend/engine/persistence/components/ingestmode/bulkload/BulkLoadTest.java @@ -64,26 +64,26 @@ public class BulkLoadTest extends BaseTest private static final String DIGEST_UDF = "LAKEHOUSE_MD5"; private static final String BATCH_ID = "batch_id"; private static final String BATCH_ID_VALUE = "xyz123"; - private static final String col_int = "col_int"; - private static final String col_string = "col_string"; - private static final String col_decimal = "col_decimal"; - private static final String col_datetime = "col_datetime"; + private static final String COL_INT = "col_int"; + private static final String COL_STRING = "col_string"; + private static final String COL_DECIMAL = "col_decimal"; + private static final String COL_DATETIME = "col_datetime"; private static Field col1 = Field.builder() - .name(col_int) + .name(COL_INT) .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) .primaryKey(true) .build(); private static Field col2 = Field.builder() - .name(col_string) + .name(COL_STRING) .type(FieldType.of(DataType.STRING, Optional.empty(), Optional.empty())) .build(); private static Field col3 = Field.builder() - .name(col_decimal) + .name(COL_DECIMAL) .type(FieldType.of(DataType.DECIMAL, 5, 2)) .build(); private static Field col4 = Field.builder() - .name(col_datetime) + .name(COL_DATETIME) .type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())) .build(); @@ -147,7 +147,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID, APPEND_TIME}; + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, BATCH_ID, APPEND_TIME}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); @@ -219,7 +219,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, BATCH_ID}; + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, BATCH_ID}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.FILES_LOADED.name(), 1); @@ -293,7 +293,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int, col_string, col_decimal, col_datetime, DIGEST, BATCH_ID, APPEND_TIME}; + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, DIGEST, BATCH_ID, APPEND_TIME}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); @@ -370,7 +370,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except // Verify execution using ingestor PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); - String[] schema = new String[]{col_int.toUpperCase(), col_string.toUpperCase(), col_decimal.toUpperCase(), col_datetime.toUpperCase(), DIGEST.toUpperCase(), BATCH_ID.toUpperCase(), APPEND_TIME.toUpperCase()}; + String[] schema = new String[]{COL_INT.toUpperCase(), COL_STRING.toUpperCase(), COL_DECIMAL.toUpperCase(), COL_DATETIME.toUpperCase(), DIGEST.toUpperCase(), BATCH_ID.toUpperCase(), APPEND_TIME.toUpperCase()}; Map expectedStats = new HashMap<>(); expectedStats.put(StatisticName.ROWS_INSERTED.name(), 3); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index 4ac0d7ab354..ea78819d696 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -256,31 +256,30 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor stats = new HashMap<>(); stats.put(StatisticName.ROWS_INSERTED, totalRowsLoaded); stats.put(StatisticName.ROWS_WITH_ERRORS, totalRowsWithError); stats.put(StatisticName.FILES_LOADED, totalFilesLoaded); + IngestorResult.Builder resultBuilder = IngestorResult.builder() + .updatedDatasets(datasets) + .putAllStatisticByName(stats) + .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)); + IngestorResult result; + if (dataFilePathsWithFailedBulkLoad.isEmpty()) { - result = IngestorResult.builder() - .status(IngestStatus.SUCCEEDED) - .updatedDatasets(datasets) - .putAllStatisticByName(stats) - .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) - .build(); + result = resultBuilder + .status(IngestStatus.SUCCEEDED) + .build(); } else { String errorMessage = String.format("Unable to bulk load these files: %s", String.join(",", dataFilePathsWithFailedBulkLoad)); - result = IngestorResult.builder() - .status(IngestStatus.FAILED) - .message(errorMessage) - .updatedDatasets(datasets) - .putAllStatisticByName(stats) - .ingestionTimestampUTC(placeHolderKeyValues.get(BATCH_START_TS_PATTERN)) - .build(); + result = resultBuilder + .status(IngestStatus.FAILED) + .message(errorMessage) + .build(); } return result; } From 8d7316a21cce577cc97aefa470abda5a1dc0b491 Mon Sep 17 00:00:00 2001 From: Zhang Lizhi Date: Wed, 4 Oct 2023 16:41:02 +0800 Subject: [PATCH 56/57] Bulk Load Batch ID and Task ID & PK Validation (#11) * Add PK validation in bulk load * Resolve conflict * Remove unnecessary delete * Introduce bulk load batch id and bulk load task id * Rename variable --- .../DeriveMainDatasetSchemaFromStaging.java | 2 +- .../logicalplan/LogicalPlanFactory.java | 27 ++- .../values/BulkLoadBatchIdValueAbstract.java | 31 --- .../components/planner/BulkLoadPlanner.java | 35 ++- .../components/planner/Planner.java | 2 + .../planner/UnitemporalSnapshotPlanner.java | 2 +- .../util/BulkLoadMetadataDatasetAbstract.java | 2 +- .../util/BulkLoadMetadataUtils.java | 31 ++- .../transformer/AbstractTransformer.java | 1 - .../transformer/LogicalPlanVisitor.java | 2 - .../components/transformer/Transformer.java | 2 - .../relational/ansi/AnsiSqlSink.java | 3 - .../visitors/BulkLoadBatchIdValueVisitor.java | 31 --- .../util/BulkLoadDatasetUtilsAnsiTest.java | 4 +- .../util/BulkLoadDatasetUtilsTest.java | 1 - .../relational/bigquery/BigQuerySink.java | 4 +- .../components/e2e/BulkLoadExecutorTest.java | 5 +- .../components/e2e/BulkLoadGeneratorTest.java | 4 +- .../components/ingestmode/BulkLoadTest.java | 48 ++-- .../BulkLoadDatasetUtilsBigQueryTest.java | 4 +- .../expected/bulk_load/expected_table1.csv | 14 +- .../expected/bulk_load/expected_table2.csv | 8 +- .../api/RelationalGeneratorAbstract.java | 8 +- .../api/RelationalIngestorAbstract.java | 18 +- .../components/relational/h2/H2Sink.java | 2 + .../ingestmode/bulkload/BulkLoadTest.java | 228 +++++++++++++++--- .../bulk-load/expected/expected_table1.csv | 6 +- .../bulk-load/expected/expected_table2.csv | 6 +- .../bulk-load/expected/expected_table3.csv | 6 +- .../bulk-load/expected/expected_table4.csv | 6 +- .../bulk-load/expected/expected_table5.csv | 9 +- .../data/bulk-load/input/staged_file5.csv | 3 - .../relational/snowflake/SnowflakeSink.java | 4 +- .../components/ingestmode/BulkLoadTest.java | 51 ++-- .../BulkLoadDatasetUtilsSnowflakeTest.java | 4 +- 35 files changed, 379 insertions(+), 235 deletions(-) delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java delete mode 100644 legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java index cf333ca3b41..b92a06436fc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/ingestmode/DeriveMainDatasetSchemaFromStaging.java @@ -149,7 +149,7 @@ public Dataset visitBulkLoad(BulkLoadAbstract bulkLoad) } Field batchIdField = Field.builder() .name(bulkLoad.batchIdField()) - .type(FieldType.of(DataType.VARCHAR, Optional.empty(), Optional.empty())) + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) .primaryKey(false) .build(); mainSchemaFields.add(batchIdField); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java index 33ae2fcad34..b16e938fbc8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/LogicalPlanFactory.java @@ -15,7 +15,8 @@ package org.finos.legend.engine.persistence.components.logicalplan; import org.finos.legend.engine.persistence.components.common.Datasets; -import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.ingestmode.BulkLoad; +import org.finos.legend.engine.persistence.components.ingestmode.IngestMode; import org.finos.legend.engine.persistence.components.logicalplan.datasets.CsvExternalDatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; @@ -32,12 +33,13 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.values.TabularValues; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; +import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataUtils; import org.finos.legend.engine.persistence.components.util.LogicalPlanUtils; import org.finos.legend.engine.persistence.components.util.MetadataDataset; import org.finos.legend.engine.persistence.components.util.MetadataUtils; import java.util.List; -import java.util.Optional; public class LogicalPlanFactory { @@ -91,14 +93,23 @@ public static LogicalPlan getLogicalPlanForConstantStats(String stats, Long valu .build(); } - public static LogicalPlan getLogicalPlanForNextBatchId(Datasets datasets) + public static LogicalPlan getLogicalPlanForNextBatchId(Datasets datasets, IngestMode ingestMode) { StringValue mainTable = StringValue.of(datasets.mainDataset().datasetReference().name().orElseThrow(IllegalStateException::new)); - MetadataDataset metadataDataset = datasets.metadataDataset().isPresent() - ? datasets.metadataDataset().get() - : MetadataDataset.builder().build(); - MetadataUtils metadataUtils = new MetadataUtils(metadataDataset); - Selection selection = metadataUtils.getBatchId(mainTable).selection(); + Selection selection; + if (ingestMode instanceof BulkLoad) + { + BulkLoadMetadataDataset bulkLoadMetadataDataset = datasets.bulkLoadMetadataDataset().orElse(BulkLoadMetadataDataset.builder().build()); + BulkLoadMetadataUtils bulkLoadMetadataUtils = new BulkLoadMetadataUtils(bulkLoadMetadataDataset); + selection = bulkLoadMetadataUtils.getBatchId(mainTable).selection(); + } + else + { + MetadataDataset metadataDataset = datasets.metadataDataset().orElse(MetadataDataset.builder().build()); + MetadataUtils metadataUtils = new MetadataUtils(metadataDataset); + selection = metadataUtils.getBatchId(mainTable).selection(); + } + return LogicalPlan.builder().addOps(selection).build(); } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java deleted file mode 100644 index 9e7b5001aad..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/logicalplan/values/BulkLoadBatchIdValueAbstract.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.logicalplan.values; - -import static org.immutables.value.Value.Immutable; -import static org.immutables.value.Value.Style; - -@Immutable -@Style( - typeAbstract = "*Abstract", - typeImmutable = "*", - jdkOnly = true, - optionalAcceptNullable = true, - strictBuilder = true -) -public interface BulkLoadBatchIdValueAbstract extends Value -{ - BulkLoadBatchIdValue INSTANCE = BulkLoadBatchIdValue.builder().build(); -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java index 6d2c38027c3..8a49dd3a5fe 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/BulkLoadPlanner.java @@ -27,8 +27,8 @@ import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetDefinition; +import org.finos.legend.engine.persistence.components.logicalplan.datasets.Field; import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesSelection; -import org.finos.legend.engine.persistence.components.logicalplan.operations.Delete; import org.finos.legend.engine.persistence.components.logicalplan.operations.Drop; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; @@ -46,7 +46,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; -import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataDataset; import org.finos.legend.engine.persistence.components.util.BulkLoadMetadataUtils; import org.finos.legend.engine.persistence.components.util.Capability; @@ -66,17 +65,20 @@ class BulkLoadPlanner extends Planner private Dataset tempDataset; private StagedFilesDataset stagedFilesDataset; private BulkLoadMetadataDataset bulkLoadMetadataDataset; + private Optional bulkLoadTaskIdValue; BulkLoadPlanner(Datasets datasets, BulkLoad ingestMode, PlannerOptions plannerOptions, Set capabilities) { super(datasets, ingestMode, plannerOptions, capabilities); // validation + validateNoPrimaryKeysInStageAndMain(); if (!(datasets.stagingDataset() instanceof StagedFilesDataset)) { throw new IllegalArgumentException("Only StagedFilesDataset are allowed under Bulk Load"); } + bulkLoadTaskIdValue = plannerOptions.bulkLoadTaskIdValue(); stagedFilesDataset = (StagedFilesDataset) datasets.stagingDataset(); bulkLoadMetadataDataset = bulkLoadMetadataDataset().orElseThrow(IllegalStateException::new); @@ -93,6 +95,15 @@ class BulkLoadPlanner extends Planner } } + private void validateNoPrimaryKeysInStageAndMain() + { + List primaryKeysFromMain = mainDataset().schema().fields().stream().filter(Field::primaryKey).map(Field::name).collect(Collectors.toList()); + validatePrimaryKeysIsEmpty(primaryKeysFromMain); + + List primaryKeysFromStage = stagingDataset().schema().fields().stream().filter(Field::primaryKey).map(Field::name).collect(Collectors.toList()); + validatePrimaryKeysIsEmpty(primaryKeysFromStage); + } + @Override protected BulkLoad ingestMode() { @@ -122,7 +133,7 @@ private LogicalPlan buildLogicalPlanForTransformWhileCopy(Resources resources) // Add batch_id field fieldsToInsert.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); - fieldsToSelect.add(BulkLoadBatchIdValue.INSTANCE); + fieldsToSelect.add(new BulkLoadMetadataUtils(bulkLoadMetadataDataset).getBatchId(StringValue.of(mainDataset().datasetReference().name().orElseThrow(IllegalStateException::new)))); // Add auditing if (ingestMode().auditing().accept(AUDIT_ENABLED)) @@ -146,23 +157,23 @@ private LogicalPlan buildLogicalPlanForCopyAndTransform(Resources resources) // Operation 2: Transfer from temp table into target table, adding extra columns at the same time - List fieldsToSelectFromTemp = new ArrayList<>(tempDataset.schemaReference().fieldValues()); + List fieldsToSelect = new ArrayList<>(tempDataset.schemaReference().fieldValues()); List fieldsToInsertIntoMain = new ArrayList<>(tempDataset.schemaReference().fieldValues()); // Add digest - ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), tempDataset, fieldsToSelectFromTemp, fieldsToInsertIntoMain)); + ingestMode().digestGenStrategy().accept(new DigestGeneration(mainDataset(), tempDataset, fieldsToSelect, fieldsToInsertIntoMain)); // Add batch_id field fieldsToInsertIntoMain.add(FieldValue.builder().datasetRef(mainDataset().datasetReference()).fieldName(ingestMode().batchIdField()).build()); - fieldsToSelectFromTemp.add(BulkLoadBatchIdValue.INSTANCE); + fieldsToSelect.add(new BulkLoadMetadataUtils(bulkLoadMetadataDataset).getBatchId(StringValue.of(mainDataset().datasetReference().name().orElseThrow(IllegalStateException::new)))); // Add auditing if (ingestMode().auditing().accept(AUDIT_ENABLED)) { - addAuditing(fieldsToInsertIntoMain, fieldsToSelectFromTemp); + addAuditing(fieldsToInsertIntoMain, fieldsToSelect); } - operations.add(Insert.of(mainDataset(), Selection.builder().source(tempDataset).addAllFields(fieldsToSelectFromTemp).build(), fieldsToInsertIntoMain)); + operations.add(Insert.of(mainDataset(), Selection.builder().source(tempDataset).addAllFields(fieldsToSelect).build(), fieldsToInsertIntoMain)); return LogicalPlan.of(operations); @@ -192,11 +203,8 @@ public LogicalPlan buildLogicalPlanForPreActions(Resources resources) @Override public LogicalPlan buildLogicalPlanForPostActions(Resources resources) { + // there is no need to delete from the temp table for big query because we always use "overwrite" when loading List operations = new ArrayList<>(); - if (!transformWhileCopy) - { - operations.add(Delete.builder().dataset(tempDataset).build()); - } return LogicalPlan.of(operations); } @@ -251,9 +259,10 @@ private Selection getRowsBasedOnAppendTimestamp(Dataset dataset, String field, S private String jsonifyBatchSourceInfo(StagedFilesDatasetProperties stagedFilesDatasetProperties) { - List files = stagedFilesDatasetProperties.files(); Map batchSourceMap = new HashMap(); + List files = stagedFilesDatasetProperties.files(); batchSourceMap.put("files", files); + bulkLoadTaskIdValue.ifPresent(taskId -> batchSourceMap.put("task_id", taskId)); ObjectMapper objectMapper = new ObjectMapper(); try { diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java index a083e1dd370..6123b86eec8 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/Planner.java @@ -97,6 +97,8 @@ default boolean enableConcurrentSafety() { return false; } + + Optional bulkLoadTaskIdValue(); } private final Datasets datasets; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java index 007e3f3054e..da241423103 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/planner/UnitemporalSnapshotPlanner.java @@ -177,7 +177,7 @@ protected Insert sqlToUpsertRows() sink."batch_id_out" = 999999999 and not exists ( - sink."digest" <> stage."digest" and sink.primaryKeys = stage.primaryKeys + sink."digest" = stage."digest" and sink.primaryKeys = stage.primaryKeys ) Partition : diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java index 65054f86e39..3cfea00878f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataDatasetAbstract.java @@ -95,7 +95,7 @@ default Dataset get() .group(group()) .name(name()) .schema(SchemaDefinition.builder() - .addFields(Field.builder().name(batchIdField()).type(FieldType.of(DataType.VARCHAR, 255, null)).build()) + .addFields(Field.builder().name(batchIdField()).type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())).build()) .addFields(Field.builder().name(tableNameField()).type(FieldType.of(DataType.VARCHAR, 255, null)).build()) .addFields(Field.builder().name(batchStartTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) .addFields(Field.builder().name(batchEndTimeField()).type(FieldType.of(DataType.DATETIME, Optional.empty(), Optional.empty())).build()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java index 161a25e345b..0ff58bbbcdc 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/util/BulkLoadMetadataUtils.java @@ -14,17 +14,23 @@ package org.finos.legend.engine.persistence.components.util; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Condition; +import org.finos.legend.engine.persistence.components.logicalplan.conditions.Equals; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; import org.finos.legend.engine.persistence.components.logicalplan.datasets.DatasetReference; import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; import org.finos.legend.engine.persistence.components.logicalplan.operations.Insert; +import org.finos.legend.engine.persistence.components.logicalplan.values.BatchIdValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionImpl; +import org.finos.legend.engine.persistence.components.logicalplan.values.FunctionName; +import org.finos.legend.engine.persistence.components.logicalplan.values.NumericalValue; import org.finos.legend.engine.persistence.components.logicalplan.values.StringValue; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.FieldValue; +import org.finos.legend.engine.persistence.components.logicalplan.values.SumBinaryValueOperator; import org.finos.legend.engine.persistence.components.logicalplan.values.Value; import org.finos.legend.engine.persistence.components.logicalplan.values.ParseJsonFunction; -import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; import java.util.ArrayList; @@ -41,6 +47,27 @@ public BulkLoadMetadataUtils(BulkLoadMetadataDataset bulkLoadMetadataDataset) this.dataset = bulkLoadMetadataDataset.get(); } + /* + SELECT COALESCE(MAX("table_batch_id"),0)+1 FROM batch_metadata WHERE "table_name" = mainTableName + */ + public BatchIdValue getBatchId(StringValue mainTableName) + { + FieldValue tableNameFieldValue = FieldValue.builder().datasetRef(dataset.datasetReference()).fieldName(bulkLoadMetadataDataset.tableNameField()).build(); + FunctionImpl tableNameInUpperCase = FunctionImpl.builder().functionName(FunctionName.UPPER).addValue(tableNameFieldValue).build(); + StringValue mainTableNameInUpperCase = StringValue.builder().value(mainTableName.value().map(field -> field.toUpperCase())) + .alias(mainTableName.alias()).build(); + Condition whereCondition = Equals.of(tableNameInUpperCase, mainTableNameInUpperCase); + FieldValue tableBatchIdFieldValue = FieldValue.builder().datasetRef(dataset.datasetReference()).fieldName(bulkLoadMetadataDataset.batchIdField()).build(); + FunctionImpl maxBatchId = FunctionImpl.builder().functionName(FunctionName.MAX).addValue(tableBatchIdFieldValue).build(); + FunctionImpl coalesce = FunctionImpl.builder().functionName(FunctionName.COALESCE).addValue(maxBatchId, NumericalValue.of(0L)).build(); + + return BatchIdValue.of(Selection.builder() + .source(dataset) + .condition(whereCondition) + .addFields(SumBinaryValueOperator.of(coalesce, NumericalValue.of(1L))) + .build()); + } + /* INSERT INTO batch_metadata ("batchIdField", "tableNameField", "batchStartTimeField", "batchEndTimeField", "batchStatusField","batchSourceInfoField") @@ -63,7 +90,7 @@ public Insert insertMetaData(StringValue tableNameValue, StringValue batchSource List metaSelectFields = new ArrayList<>(); metaInsertFields.add(batchId); - metaSelectFields.add(BulkLoadBatchIdValue.INSTANCE); + metaSelectFields.add(getBatchId(tableNameValue)); metaInsertFields.add(tableName); metaSelectFields.add(tableNameValue); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java index e9cab94d2d7..7f7e667044f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/AbstractTransformer.java @@ -69,7 +69,6 @@ protected VisitorContext createContext(TransformOptions options) .batchStartTimestamp(options.batchStartTimestampValue()) .batchIdPattern(options.batchIdPattern()) .infiniteBatchIdValue(options.infiniteBatchIdValue()) - .bulkLoadBatchIdValue(options.bulkLoadBatchIdValue()) .bulkLoadBatchStatusPattern(options.bulkLoadBatchStatusPattern()) .addAllOptimizers(options.optimizers()) .quoteIdentifier(sink.quoteIdentifier()) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java index 7b59312d55f..5801b636aa0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/LogicalPlanVisitor.java @@ -51,8 +51,6 @@ interface VisitorContextAbstract Optional infiniteBatchIdValue(); - Optional bulkLoadBatchIdValue(); - Optional bulkLoadBatchStatusPattern(); List optimizers(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java index be6f7a440d6..3e663deb408 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-physical-plan/src/main/java/org/finos/legend/engine/persistence/components/transformer/Transformer.java @@ -57,8 +57,6 @@ public Clock executionTimestampClock() public abstract Optional infiniteBatchIdValue(); - public abstract Optional bulkLoadBatchIdValue(); - public abstract Optional bulkLoadBatchStatusPattern(); public abstract List optimizers(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java index 63a2d109bb4..f7e3d5e6ac4 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/AnsiSqlSink.java @@ -61,7 +61,6 @@ import org.finos.legend.engine.persistence.components.logicalplan.values.BatchEndTimestamp; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchIdValue; import org.finos.legend.engine.persistence.components.logicalplan.values.BatchStartTimestamp; -import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchStatusValue; import org.finos.legend.engine.persistence.components.logicalplan.values.Case; import org.finos.legend.engine.persistence.components.logicalplan.values.DatetimeValue; @@ -95,7 +94,6 @@ import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BatchEndTimestampVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BatchIdValueVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BatchStartTimestampVisitor; -import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BulkLoadBatchIdValueVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.BulkLoadBatchStatusValueVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.CaseVisitor; import org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors.DatasetAdditionalPropertiesVisitor; @@ -235,7 +233,6 @@ public class AnsiSqlSink extends RelationalSink logicalPlanVisitorByClass.put(Show.class, new ShowVisitor()); logicalPlanVisitorByClass.put(BatchIdValue.class, new BatchIdValueVisitor()); logicalPlanVisitorByClass.put(InfiniteBatchIdValue.class, new InfiniteBatchIdValueVisitor()); - logicalPlanVisitorByClass.put(BulkLoadBatchIdValue.class, new BulkLoadBatchIdValueVisitor()); logicalPlanVisitorByClass.put(BulkLoadBatchStatusValue.class, new BulkLoadBatchStatusValueVisitor()); LOGICAL_PLAN_VISITOR_BY_CLASS = Collections.unmodifiableMap(logicalPlanVisitorByClass); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java deleted file mode 100644 index faf24aac182..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/main/java/org/finos/legend/engine/persistence/components/relational/ansi/sql/visitors/BulkLoadBatchIdValueVisitor.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2023 Goldman Sachs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.finos.legend.engine.persistence.components.relational.ansi.sql.visitors; - -import org.finos.legend.engine.persistence.components.logicalplan.values.BulkLoadBatchIdValue; -import org.finos.legend.engine.persistence.components.physicalplan.PhysicalPlanNode; -import org.finos.legend.engine.persistence.components.relational.sqldom.schemaops.values.StringValue; -import org.finos.legend.engine.persistence.components.transformer.LogicalPlanVisitor; -import org.finos.legend.engine.persistence.components.transformer.VisitorContext; - -public class BulkLoadBatchIdValueVisitor implements LogicalPlanVisitor -{ - @Override - public VisitorResult visit(PhysicalPlanNode prev, BulkLoadBatchIdValue current, VisitorContext context) - { - prev.push(new StringValue(context.bulkLoadBatchIdValue().orElseThrow(IllegalStateException::new), context.quoteIdentifier())); - return new VisitorResult(); - } -} diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java index 67a3337de07..2a6d727b17f 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsAnsiTest.java @@ -24,13 +24,13 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\")" + - " (SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; + " (SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"TABLE_NAME\") = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',CURRENT_TIMESTAMP(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java index 65e5861a277..6e563621a28 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-ansi/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsTest.java @@ -37,7 +37,6 @@ public abstract class BulkLoadDatasetUtilsTest private final TransformOptions transformOptions = TransformOptions .builder() .executionTimestampClock(Clock.fixed(executionZonedDateTime.toInstant(), ZoneOffset.UTC)) - .bulkLoadBatchIdValue("batch_id_123") .bulkLoadBatchStatusPattern("") .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java index 9ea03a4b618..62c3df96ff0 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/BigQuerySink.java @@ -82,6 +82,7 @@ import java.util.Optional; import java.util.Set; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; public class BigQuerySink extends AnsiSqlSink @@ -263,7 +264,8 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor preActionsSql = operations.preActionsSql(); List ingestSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + - "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` STRING,`append_time` DATETIME)"; + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64,`append_time` DATETIME)"; String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + @@ -136,12 +137,16 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,{NEXT_BATCH_ID},PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + + "(SELECT {NEXT_BATCH_ID},'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"xyz123\"}'))"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); @@ -150,7 +155,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledNoExtraOptions() } @Test - public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() + public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptionsNoTaskId() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField(BATCH_ID) @@ -185,17 +190,17 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); List preActionsSql = operations.preActionsSql(); List ingestSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + - "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` STRING,`append_time` DATETIME)"; + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64,`append_time` DATETIME)"; String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + @@ -204,12 +209,17 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabledAllOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; + String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),'my_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}'," + + "PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedCopySql, ingestSql.get(0)); Assertions.assertEquals(expectedInsertSql, ingestSql.get(1)); + Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); Assertions.assertEquals("SELECT 0 as `rowsDeleted`", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as `rowsTerminated`", statsSql.get(ROWS_TERMINATED)); @@ -244,7 +254,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -254,7 +264,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + - "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` STRING)"; + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`batch_id` INT64)"; String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + @@ -262,7 +272,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `batch_id`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,'xyz123' " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -302,7 +312,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE) .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -312,7 +322,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `my_db`.`my_name`" + - "(`col_int` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`digest` STRING,`batch_id` STRING,`append_time` DATETIME)"; + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON,`digest` STRING,`batch_id` INT64,`append_time` DATETIME)"; String expectedCopySql = "LOAD DATA OVERWRITE `my_db`.`my_name_legend_persistence_temp` " + "(`col_int` INT64,`col_string` STRING,`col_decimal` NUMERIC(5,2),`col_datetime` DATETIME,`col_variant` JSON) " + @@ -320,7 +330,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptions() String expectedInsertSql = "INSERT INTO `my_db`.`my_name` " + "(`col_int`, `col_string`, `col_decimal`, `col_datetime`, `col_variant`, `digest`, `batch_id`, `append_time`) " + - "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`col_int`,legend_persistence_temp.`col_string`,legend_persistence_temp.`col_decimal`,legend_persistence_temp.`col_datetime`,legend_persistence_temp.`col_variant`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + "FROM `my_db`.`my_name_legend_persistence_temp` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -360,7 +370,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() .relationalSink(BigQuerySink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -371,7 +381,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS `MY_DB`.`MY_NAME`" + - "(`COL_INT` INT64 NOT NULL PRIMARY KEY NOT ENFORCED,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON,`DIGEST` STRING,`BATCH_ID` STRING,`APPEND_TIME` DATETIME)"; + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON,`DIGEST` STRING,`BATCH_ID` INT64,`APPEND_TIME` DATETIME)"; String expectedCopySql = "LOAD DATA OVERWRITE `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` " + "(`COL_INT` INT64,`COL_STRING` STRING,`COL_DECIMAL` NUMERIC(5,2),`COL_DATETIME` DATETIME,`COL_VARIANT` JSON) " + @@ -379,7 +389,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledNoExtraOptionsUpperCase() String expectedInsertSql = "INSERT INTO `MY_DB`.`MY_NAME` " + "(`COL_INT`, `COL_STRING`, `COL_DECIMAL`, `COL_DATETIME`, `COL_VARIANT`, `DIGEST`, `BATCH_ID`, `APPEND_TIME`) " + - "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),'xyz123',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + + "(SELECT legend_persistence_temp.`COL_INT`,legend_persistence_temp.`COL_STRING`,legend_persistence_temp.`COL_DECIMAL`,legend_persistence_temp.`COL_DATETIME`,legend_persistence_temp.`COL_VARIANT`,LAKEHOUSE_MD5(TO_JSON(legend_persistence_temp)),(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.`TABLE_NAME`) = 'MY_NAME'),PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00') " + "FROM `MY_DB`.`MY_NAME_LEGEND_PERSISTENCE_TEMP` as legend_persistence_temp)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -452,7 +462,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(BigQuerySink.get()) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java index efcf49965fc..739b22c7274 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsBigQueryTest.java @@ -24,14 +24,14 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(`batch_id`, `table_name`, `batch_start_ts_utc`, `batch_end_ts_utc`, `batch_status`, `batch_source_info`) " + - "(SELECT 'batch_id_123','appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`batch_id`),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`table_name`) = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA " + "(`BATCH_ID`, `TABLE_NAME`, `BATCH_START_TS_UTC`, `BATCH_END_TS_UTC`, `BATCH_STATUS`, `BATCH_SOURCE_INFO`) " + - "(SELECT 'batch_id_123','BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.`BATCH_ID`),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.`TABLE_NAME`) = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME',PARSE_DATETIME('%Y-%m-%d %H:%M:%S','2000-01-01 00:00:00'),CURRENT_DATETIME(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv index 259d7359904..e7a4d4b5f4b 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table1.csv @@ -1,7 +1,7 @@ -0,Candy,999.99,2022-01-15T00:00:00,xyz123,2000-01-01T00:00:00 -1,Andy,5.2,2022-01-11T00:00:00,xyz123,2000-01-01T00:00:00 -1,Andy,5.2,2022-01-11T00:00:00,xyz123,2000-01-01T00:00:00 -2,Bella,99.99,2022-01-12T00:00:00,xyz123,2000-01-01T00:00:00 -2,Bella,99.99,2022-01-12T00:00:00,xyz123,2000-01-01T00:00:00 -49,Sandy,123.45,2022-01-13T00:00:00,xyz123,2000-01-01T00:00:00 -50,Mindy,0,2022-01-14T00:00:00,xyz123,2000-01-01T00:00:00 \ No newline at end of file +0,Candy,999.99,2022-01-15T00:00:00,1,2000-01-01T00:00:00 +1,Andy,5.2,2022-01-11T00:00:00,1,2000-01-01T00:00:00 +1,Andy,5.2,2022-01-11T00:00:00,1,2000-01-01T00:00:00 +2,Bella,99.99,2022-01-12T00:00:00,1,2000-01-01T00:00:00 +2,Bella,99.99,2022-01-12T00:00:00,1,2000-01-01T00:00:00 +49,Sandy,123.45,2022-01-13T00:00:00,1,2000-01-01T00:00:00 +50,Mindy,0,2022-01-14T00:00:00,1,2000-01-01T00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv index 4dfc256dd31..c1da46d0fb6 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/test/resources/expected/bulk_load/expected_table2.csv @@ -1,4 +1,4 @@ -1,Andy,5.2,2022-01-11T00:00:00,xyz123,2000-01-01T00:00:00 -2,Bella,99.99,2022-01-12T00:00:00,xyz123,2000-01-01T00:00:00 -11,Success,123.45,2022-01-13T00:00:00,xyz123,2000-01-01T00:00:00 -49,Sandy,123.45,2022-01-13T00:00:00,xyz123,2000-01-01T00:00:00 \ No newline at end of file +1,Andy,5.2,2022-01-11T00:00:00,1,2000-01-01T00:00:00 +2,Bella,99.99,2022-01-12T00:00:00,1,2000-01-01T00:00:00 +11,Success,123.45,2022-01-13T00:00:00,1,2000-01-01T00:00:00 +49,Sandy,123.45,2022-01-13T00:00:00,1,2000-01-01T00:00:00 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java index 24689071f88..e597d6451bb 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalGeneratorAbstract.java @@ -114,11 +114,7 @@ public boolean enableConcurrentSafety() public abstract Optional infiniteBatchIdValue(); - @Default - public String bulkLoadBatchIdValue() - { - return UUID.randomUUID().toString(); - } + public abstract Optional bulkLoadTaskIdValue(); @Default public String bulkLoadBatchStatusPattern() @@ -141,6 +137,7 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) + .bulkLoadTaskIdValue(bulkLoadTaskIdValue()) .build(); } @@ -152,7 +149,6 @@ protected TransformOptions transformOptions() .batchStartTimestampPattern(batchStartTimestampPattern()) .batchEndTimestampPattern(batchEndTimestampPattern()) .infiniteBatchIdValue(infiniteBatchIdValue()) - .bulkLoadBatchIdValue(bulkLoadBatchIdValue()) .bulkLoadBatchStatusPattern(bulkLoadBatchStatusPattern()) .batchIdPattern(batchIdPattern()); diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java index c2d31164dfe..c7f1f7ab612 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-core/src/main/java/org/finos/legend/engine/persistence/components/relational/api/RelationalIngestorAbstract.java @@ -77,7 +77,6 @@ import java.util.Arrays; import java.util.Set; import java.util.stream.Collectors; -import java.util.UUID; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MAX_OF_FIELD; import static org.finos.legend.engine.persistence.components.logicalplan.LogicalPlanFactory.MIN_OF_FIELD; @@ -98,8 +97,8 @@ public abstract class RelationalIngestorAbstract private static final String STAGING = "staging"; private static final String UNDERSCORE = "_"; private static final String SINGLE_QUOTE = "'"; - private static final String BATCH_ID_PATTERN = "{NEXT_BATCH_ID_PATTERN}"; + public static final String BATCH_ID_PATTERN = "{NEXT_BATCH_ID_PATTERN}"; public static final String BATCH_START_TS_PATTERN = "{BATCH_START_TIMESTAMP_PLACEHOLDER}"; private static final String BATCH_END_TS_PATTERN = "{BATCH_END_TIMESTAMP_PLACEHOLDER}"; @@ -161,18 +160,14 @@ public Set schemaEvolutionCapabilitySet() return Collections.emptySet(); } - @Default - public String bulkLoadBatchIdValue() - { - return UUID.randomUUID().toString(); - } - //---------- FIELDS ---------- public abstract IngestMode ingestMode(); public abstract RelationalSink relationalSink(); + public abstract Optional bulkLoadTaskIdValue(); + @Derived protected PlannerOptions plannerOptions() { @@ -182,6 +177,7 @@ protected PlannerOptions plannerOptions() .enableSchemaEvolution(enableSchemaEvolution()) .createStagingDataset(createStagingDataset()) .enableConcurrentSafety(enableConcurrentSafety()) + .bulkLoadTaskIdValue(bulkLoadTaskIdValue()) .build(); } @@ -494,7 +490,7 @@ private void init(Datasets datasets) .batchStartTimestampPattern(BATCH_START_TS_PATTERN) .batchEndTimestampPattern(BATCH_END_TS_PATTERN) .batchIdPattern(BATCH_ID_PATTERN) - .bulkLoadBatchIdValue(bulkLoadBatchIdValue()) + .bulkLoadTaskIdValue(bulkLoadTaskIdValue()) .build(); planner = Planners.get(enrichedDatasets, enrichedIngestMode, plannerOptions(), relationalSink().capabilities()); @@ -701,9 +697,9 @@ else if (lowerBound instanceof Number) private Optional getNextBatchId(Datasets datasets, Executor executor, Transformer transformer, IngestMode ingestMode) { - if (ingestMode.accept(IngestModeVisitors.IS_INGEST_MODE_TEMPORAL)) + if (ingestMode.accept(IngestModeVisitors.IS_INGEST_MODE_TEMPORAL) || ingestMode instanceof BulkLoad) { - LogicalPlan logicalPlanForNextBatchId = LogicalPlanFactory.getLogicalPlanForNextBatchId(datasets); + LogicalPlan logicalPlanForNextBatchId = LogicalPlanFactory.getLogicalPlanForNextBatchId(datasets, ingestMode); List tabularData = executor.executePhysicalPlanAndGetResults(transformer.generatePhysicalPlan(logicalPlanForNextBatchId)); Optional nextBatchId = Optional.ofNullable(tabularData.stream() .findFirst() diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java index 528fb07dec6..9b8ac1db944 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/main/java/org/finos/legend/engine/persistence/components/relational/h2/H2Sink.java @@ -77,6 +77,7 @@ import java.util.Map; import java.util.Set; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; public class H2Sink extends AnsiSqlSink @@ -226,6 +227,7 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" VARCHAR,\"append_time\" TIMESTAMP)"; + "(\"col_int\" INTEGER,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\", \"append_time\") " + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + - "'xyz123','2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv'," + + "{NEXT_BATCH_ID_PATTERN},'2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file1.csv'," + "'col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -156,11 +156,11 @@ public void testBulkLoadWithDigestNotGeneratedAuditEnabled() throws Exception String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table1.csv"; - RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.empty()); executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); - verifyBulkLoadMetadata(appendMetadata, filePath); + verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.empty()); } @Test @@ -195,7 +195,7 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception .relationalSink(H2Sink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE_1) .build(); GeneratorResult operations = generator.generateOperations(datasets); @@ -205,12 +205,12 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" VARCHAR)"; + "(\"col_int\" INTEGER,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"batch_id\" INTEGER)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"batch_id\") " + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + - "'xyz123' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file2.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MAIN') FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file2.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -227,10 +227,10 @@ public void testBulkLoadWithDigestNotGeneratedAuditDisabled() throws Exception String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table2.csv"; - RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.of(TASK_ID_VALUE_1)); executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); - verifyBulkLoadMetadata(appendMetadata, filePath); + verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.of(TASK_ID_VALUE_1)); } @Test @@ -267,7 +267,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE_1) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -278,13 +278,13 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"main\"" + - "(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"batch_id\" VARCHAR,\"append_time\" TIMESTAMP)"; + "(\"col_int\" INTEGER,\"col_string\" VARCHAR,\"col_decimal\" DECIMAL(5,2),\"col_datetime\" TIMESTAMP,\"digest\" VARCHAR,\"batch_id\" INTEGER,\"append_time\" TIMESTAMP)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"main\" " + "(\"col_int\", \"col_string\", \"col_decimal\", \"col_datetime\", \"digest\", \"batch_id\", \"append_time\") " + "SELECT CONVERT(\"col_int\",INTEGER),CONVERT(\"col_string\",VARCHAR),CONVERT(\"col_decimal\",DECIMAL(5,2)),CONVERT(\"col_datetime\",TIMESTAMP)," + "LAKEHOUSE_MD5(ARRAY['col_int','col_string','col_decimal','col_datetime'],ARRAY[\"col_int\",\"col_string\",\"col_decimal\",\"col_datetime\"])," + - "'xyz123','2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MAIN'),'2000-01-01 00:00:00' FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file3.csv','col_int,col_string,col_decimal,col_datetime',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -302,10 +302,10 @@ public void testBulkLoadWithDigestGeneratedAuditEnabled() throws Exception String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table3.csv"; - RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.of(TASK_ID_VALUE_1)); executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); - verifyBulkLoadMetadata(appendMetadata, filePath); + verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.of(TASK_ID_VALUE_1)); } @Test @@ -342,7 +342,7 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) .collectStatistics(true) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE_1) .executionTimestampClock(fixedClock_2000_01_01) .caseConversion(CaseConversion.TO_UPPER) .build(); @@ -354,13 +354,13 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except Map statsSql = operations.postIngestStatisticsSql(); String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"TEST_DB\".\"TEST\".\"MAIN\"" + - "(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" DECIMAL(5,2),\"COL_DATETIME\" TIMESTAMP,\"DIGEST\" VARCHAR,\"BATCH_ID\" VARCHAR,\"APPEND_TIME\" TIMESTAMP)"; + "(\"COL_INT\" INTEGER,\"COL_STRING\" VARCHAR,\"COL_DECIMAL\" DECIMAL(5,2),\"COL_DATETIME\" TIMESTAMP,\"DIGEST\" VARCHAR,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" TIMESTAMP)"; String expectedIngestSql = "INSERT INTO \"TEST_DB\".\"TEST\".\"MAIN\" " + "(\"COL_INT\", \"COL_STRING\", \"COL_DECIMAL\", \"COL_DATETIME\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + "SELECT CONVERT(\"COL_INT\",INTEGER),CONVERT(\"COL_STRING\",VARCHAR),CONVERT(\"COL_DECIMAL\",DECIMAL(5,2)),CONVERT(\"COL_DATETIME\",TIMESTAMP)," + "LAKEHOUSE_MD5(ARRAY['COL_INT','COL_STRING','COL_DECIMAL','COL_DATETIME'],ARRAY[\"COL_INT\",\"COL_STRING\",\"COL_DECIMAL\",\"COL_DATETIME\"])," + - "'xyz123','2000-01-01 00:00:00' " + + "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MAIN'),'2000-01-01 00:00:00' " + "FROM CSVREAD('src/test/resources/data/bulk-load/input/staged_file4.csv','COL_INT,COL_STRING,COL_DECIMAL,COL_DATETIME',NULL)"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); @@ -379,10 +379,64 @@ public void testBulkLoadWithDigestGeneratedAuditEnabledUpperCase() throws Except String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table4.csv"; - RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER); + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.TO_UPPER, Optional.of(TASK_ID_VALUE_1)); executePlansAndVerifyForCaseConversion(ingestor, datasets, schema, expectedDataPath, expectedStats); Map appendMetadata = h2Sink.executeQuery("select * from BULK_LOAD_BATCH_METADATA").get(0); - verifyBulkLoadMetadataForUpperCase(appendMetadata, filePath); + verifyBulkLoadMetadataForUpperCase(appendMetadata, filePath, 1, Optional.of(TASK_ID_VALUE_1)); + } + + @Test + public void testBulkLoadWithDigestNotGeneratedAuditDisabledTwoBatches() throws Exception + { + String filePath = "src/test/resources/data/bulk-load/input/staged_file2.csv"; + + BulkLoad bulkLoad = BulkLoad.builder() + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .batchIdField(BATCH_ID) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .addAllFiles(Collections.singletonList(filePath)).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database(testDatabaseName).group(testSchemaName).name(mainTableName).alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + Datasets datasets = Datasets.of(mainDataset, stagedFilesDataset); + + + // Verify execution using ingestor (first batch) + PlannerOptions options = PlannerOptions.builder().collectStatistics(true).build(); + String[] schema = new String[]{COL_INT, COL_STRING, COL_DECIMAL, COL_DATETIME, BATCH_ID}; + + Map expectedStats = new HashMap<>(); + expectedStats.put(StatisticName.FILES_LOADED.name(), 1); + expectedStats.put(StatisticName.ROWS_WITH_ERRORS.name(), 0); + + String expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table2.csv"; + + RelationalIngestor ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.of(TASK_ID_VALUE_1)); + executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); + Map appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); + verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.of(TASK_ID_VALUE_1)); + + + // Verify execution using ingestor (second batch) + expectedDataPath = "src/test/resources/data/bulk-load/expected/expected_table5.csv"; + + ingestor = getRelationalIngestor(bulkLoad, options, fixedClock_2000_01_01, CaseConversion.NONE, Optional.of(TASK_ID_VALUE_2)); + executePlansAndVerifyResults(ingestor, datasets, schema, expectedDataPath, expectedStats, false); + appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(0); + verifyBulkLoadMetadata(appendMetadata, filePath, 1, Optional.of(TASK_ID_VALUE_1)); + appendMetadata = h2Sink.executeQuery("select * from bulk_load_batch_metadata").get(1); + verifyBulkLoadMetadata(appendMetadata, filePath, 2, Optional.of(TASK_ID_VALUE_2)); } @Test @@ -445,7 +499,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() RelationalGenerator generator = RelationalGenerator.builder() .ingestMode(bulkLoad) .relationalSink(H2Sink.get()) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(TASK_ID_VALUE_1) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) .build(); @@ -459,6 +513,100 @@ public void testBulkLoadStagedFilesDatasetNotProvided() } } + @Test + public void testBulkLoadStageHasPrimaryKey() + { + try + { + Field pkCol = Field.builder() + .name("some_pk") + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .primaryKey(true) + .build(); + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .addAllFiles(Collections.singletonList("src/test/resources/data/bulk-load/input/staged_file1.csv")).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, pkCol)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Primary key list must be empty")); + } + } + + @Test + public void testBulkLoadMainHasPrimaryKey() + { + try + { + Field pkCol = Field.builder() + .name("some_pk") + .type(FieldType.of(DataType.INT, Optional.empty(), Optional.empty())) + .primaryKey(true) + .build(); + + BulkLoad bulkLoad = BulkLoad.builder() + .batchIdField(BATCH_ID) + .digestGenStrategy(NoDigestGenStrategy.builder().build()) + .auditing(NoAuditing.builder().build()) + .build(); + + Dataset stagedFilesDataset = StagedFilesDataset.builder() + .stagedFilesDatasetProperties( + H2StagedFilesDatasetProperties.builder() + .fileFormat(FileFormat.CSV) + .addAllFiles(Collections.singletonList("src/test/resources/data/bulk-load/input/staged_file1.csv")).build()) + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4)).build()) + .build(); + + Dataset mainDataset = DatasetDefinition.builder() + .database("my_db").name("my_name").alias("my_alias") + .schema(SchemaDefinition.builder().addAllFields(Arrays.asList(col1, col2, col3, col4, pkCol)).build()) + .build(); + + RelationalGenerator generator = RelationalGenerator.builder() + .ingestMode(bulkLoad) + .relationalSink(H2Sink.get()) + .bulkLoadTaskIdValue(TASK_ID_VALUE_1) + .collectStatistics(true) + .executionTimestampClock(fixedClock_2000_01_01) + .build(); + + GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); + Assertions.fail("Exception was not thrown"); + } + catch (Exception e) + { + Assertions.assertTrue(e.getMessage().contains("Primary key list must be empty")); + } + } + @Test public void testBulkLoadMoreThanOneFile() { @@ -499,7 +647,7 @@ public void testBulkLoadNotCsvFile() } } - RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion) + RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions options, Clock executionTimestampClock, CaseConversion caseConversion, Optional taskId) { return RelationalIngestor.builder() .ingestMode(ingestMode) @@ -507,30 +655,46 @@ RelationalIngestor getRelationalIngestor(IngestMode ingestMode, PlannerOptions o .executionTimestampClock(executionTimestampClock) .cleanupStagingData(options.cleanupStagingData()) .collectStatistics(options.collectStatistics()) - .bulkLoadBatchIdValue(BATCH_ID_VALUE) + .bulkLoadTaskIdValue(taskId) .enableConcurrentSafety(true) .caseConversion(caseConversion) .build(); } - private void verifyBulkLoadMetadata(Map appendMetadata, String fileName) + private void verifyBulkLoadMetadata(Map appendMetadata, String fileName, int batchId, Optional taskId) { - Assertions.assertEquals("xyz123", appendMetadata.get("batch_id")); + Assertions.assertEquals(batchId, appendMetadata.get("batch_id")); Assertions.assertEquals("SUCCEEDED", appendMetadata.get("batch_status")); Assertions.assertEquals("main", appendMetadata.get("table_name")); - Assertions.assertEquals(String.format("{\"files\":[\"%s\"]}", fileName), appendMetadata.get("batch_source_info")); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_start_ts_utc").toString()); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("batch_end_ts_utc").toString()); + Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"files\":[\"%s\"]", fileName))); + if (taskId.isPresent()) + { + Assertions.assertTrue(appendMetadata.get("batch_source_info").toString().contains(String.format("\"task_id\":\"%s\"", taskId.get()))); + } + else + { + Assertions.assertFalse(appendMetadata.get("batch_source_info").toString().contains("\"task_id\"")); + } } - private void verifyBulkLoadMetadataForUpperCase(Map appendMetadata, String fileName) + private void verifyBulkLoadMetadataForUpperCase(Map appendMetadata, String fileName, int batchId, Optional taskId) { - Assertions.assertEquals("xyz123", appendMetadata.get("BATCH_ID")); + Assertions.assertEquals(batchId, appendMetadata.get("BATCH_ID")); Assertions.assertEquals("SUCCEEDED", appendMetadata.get("BATCH_STATUS")); Assertions.assertEquals("MAIN", appendMetadata.get("TABLE_NAME")); - Assertions.assertEquals(String.format("{\"files\":[\"%s\"]}", fileName), appendMetadata.get("BATCH_SOURCE_INFO")); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("BATCH_START_TS_UTC").toString()); Assertions.assertEquals("2000-01-01 00:00:00.0", appendMetadata.get("BATCH_END_TS_UTC").toString()); + Assertions.assertTrue(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains(String.format("\"files\":[\"%s\"]", fileName))); + if (taskId.isPresent()) + { + Assertions.assertTrue(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains(String.format("\"task_id\":\"%s\"", taskId.get()))); + } + else + { + Assertions.assertFalse(appendMetadata.get("BATCH_SOURCE_INFO").toString().contains("\"task_id\"")); + } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv index b68e9aa646b..022020ba331 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table1.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,xyz123,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,xyz123,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,1,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,1,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,1,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv index c807b1c4764..92b02b8f19c 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table2.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,xyz123 -2,Bella,99.99,2022-01-12 00:00:00.0,xyz123 -49,Sandy,123.45,2022-01-13 00:00:00.0,xyz123 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,1 +2,Bella,99.99,2022-01-12 00:00:00.0,1 +49,Sandy,123.45,2022-01-13 00:00:00.0,1 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv index c6774c43774..b9421520b4a 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table3.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,9fc62c73317227ab0760aed72f4fee17,xyz123,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,b0383f1a479eb2a6c5186f045af4c51f,xyz123,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,dc170980c8540e2a667753e793dad94c,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,9fc62c73317227ab0760aed72f4fee17,1,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,b0383f1a479eb2a6c5186f045af4c51f,1,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,dc170980c8540e2a667753e793dad94c,1,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv index 7888259500d..0b162ed75bd 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table4.csv @@ -1,3 +1,3 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,e7dc92b208f2244b9ece45d706474f55,xyz123,2000-01-01 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0,278cf3ee2c2981bb8aeade81cc21e87a,xyz123,2000-01-01 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0,e8ff35a6699515eaca0a798a7f989978,xyz123,2000-01-01 00:00:00.0 \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,e7dc92b208f2244b9ece45d706474f55,1,2000-01-01 00:00:00.0 +2,Bella,99.99,2022-01-12 00:00:00.0,278cf3ee2c2981bb8aeade81cc21e87a,1,2000-01-01 00:00:00.0 +49,Sandy,123.45,2022-01-13 00:00:00.0,e8ff35a6699515eaca0a798a7f989978,1,2000-01-01 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv index 7d90d71c952..a20715af7c5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/expected/expected_table5.csv @@ -1,3 +1,6 @@ -1,Andy,5.20,2022-01-11 00:00:00.0,4B39799C7A1FB5EFC4BC328966A159E0,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv -2,Bella,99.99,2022-01-12 00:00:00.0,58467B440BCED7607369DC8A260B0607,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv -49,Sandy,123.45,2022-01-13 00:00:00.0,29B8C8A6CD28B069290372E6B54B6C72,2000-01-01 00:00:00.0,src/test/resources/data/bulk-load/input/staged_file5.csv \ No newline at end of file +1,Andy,5.20,2022-01-11 00:00:00.0,1 +2,Bella,99.99,2022-01-12 00:00:00.0,1 +49,Sandy,123.45,2022-01-13 00:00:00.0,1 +1,Andy,5.20,2022-01-11 00:00:00.0,2 +2,Bella,99.99,2022-01-12 00:00:00.0,2 +49,Sandy,123.45,2022-01-13 00:00:00.0,2 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv deleted file mode 100644 index dd2941bedb8..00000000000 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-h2/src/test/resources/data/bulk-load/input/staged_file5.csv +++ /dev/null @@ -1,3 +0,0 @@ -1,Andy,5.20,2022-01-11 00:00:00.0 -2,Bella,99.99,2022-01-12 00:00:00.0 -49,Sandy,123.45,2022-01-13 00:00:00.0 \ No newline at end of file diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java index ea78819d696..46465426e97 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/main/java/org/finos/legend/engine/persistence/components/relational/snowflake/SnowflakeSink.java @@ -85,6 +85,7 @@ import java.util.Objects; import java.util.ArrayList; +import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_ID_PATTERN; import static org.finos.legend.engine.persistence.components.relational.api.RelationalIngestorAbstract.BATCH_START_TS_PATTERN; public class SnowflakeSink extends AnsiSqlSink @@ -264,7 +265,8 @@ public IngestorResult performBulkLoad(Datasets datasets, Executor metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"batch_id\" VARCHAR,\"append_time\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER,\"col_integer\" INTEGER,\"batch_id\" INTEGER,\"append_time\" DATETIME)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_int\", \"col_integer\", \"batch_id\", \"append_time\") " + "FROM " + - "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",'batch123','2000-01-01 00:00:00' " + + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\",{NEXT_BATCH_ID},'2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage)" + " on_error = 'ABORT_STATEMENT'"; String expectedMetadataIngestSql = "INSERT INTO bulk_load_batch_metadata (\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT 'batch123','my_name','2000-01-01 00:00:00',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + "(SELECT {NEXT_BATCH_ID},'my_name','2000-01-01 00:00:00',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"],\"task_id\":\"task123\"}'))"; Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); @@ -163,7 +161,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() .ingestMode(bulkLoad) .relationalSink(SnowflakeSink.get()) .collectStatistics(true) - .bulkLoadBatchIdValue("batch123") + .bulkLoadTaskIdValue("task123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -172,11 +170,11 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() List ingestSql = operations.ingestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT,\"col_variant\" VARIANT,\"batch_id\" VARCHAR)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_bigint\" BIGINT,\"col_variant\" VARIANT,\"batch_id\" INTEGER)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_bigint\", \"col_variant\", \"batch_id\") " + "FROM " + - "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\",'batch123' " + + "(SELECT t.$4 as \"col_bigint\",TO_VARIANT(PARSE_JSON(t.$5)) as \"col_variant\",(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME') " + "FROM my_location (FILE_FORMAT => 'my_file_format', PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as t) " + "on_error = 'ABORT_STATEMENT'"; @@ -189,7 +187,7 @@ public void testBulkLoadWithDigestNotGeneratedColumnNumbersProvided() } @Test - public void testBulkLoadWithUpperCaseConversionAndDefaultBatchId() + public void testBulkLoadWithUpperCaseConversionAndNoTaskId() { BulkLoad bulkLoad = BulkLoad.builder() .batchIdField("batch_id") @@ -223,31 +221,28 @@ public void testBulkLoadWithUpperCaseConversionAndDefaultBatchId() List preActionsSql = operations.preActionsSql(); List ingestSql = operations.ingestSql(); + List metadataIngestSql = operations.metadataIngestSql(); Map statsSql = operations.postIngestStatisticsSql(); - // Extract the generated UUID - Pattern pattern = Pattern.compile("[a-f0-9]{8}(?:-[a-f0-9]{4}){4}[a-f0-9]{8}"); - Matcher matcher = pattern.matcher(ingestSql.get(0)); - String uuid = ""; - if (matcher.find()) - { - uuid = matcher.group(); - } - - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER NOT NULL PRIMARY KEY," + - "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" VARCHAR,\"APPEND_TIME\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"MY_DB\".\"MY_NAME\"(\"COL_INT\" INTEGER," + + "\"COL_INTEGER\" INTEGER,\"DIGEST\" VARCHAR,\"BATCH_ID\" INTEGER,\"APPEND_TIME\" DATETIME)"; String expectedIngestSql = "COPY INTO \"MY_DB\".\"MY_NAME\" " + "(\"COL_INT\", \"COL_INTEGER\", \"DIGEST\", \"BATCH_ID\", \"APPEND_TIME\") " + "FROM " + "(SELECT legend_persistence_stage.$1 as \"COL_INT\",legend_persistence_stage.$2 as \"COL_INTEGER\"," + "LAKEHOUSE_MD5(OBJECT_CONSTRUCT('COL_INT',legend_persistence_stage.$1,'COL_INTEGER',legend_persistence_stage.$2))," + - "'%s','2000-01-01 00:00:00' " + + "(SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME'),'2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; + String expectedMetadataIngestSql = "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + + "(SELECT (SELECT COALESCE(MAX(BULK_LOAD_BATCH_METADATA.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as BULK_LOAD_BATCH_METADATA WHERE UPPER(BULK_LOAD_BATCH_METADATA.\"TABLE_NAME\") = 'MY_NAME')," + + "'MY_NAME','2000-01-01 00:00:00',SYSDATE(),'{BULK_LOAD_BATCH_STATUS_PLACEHOLDER}',PARSE_JSON('{\"files\":[\"/path/xyz/file1.csv\",\"/path/xyz/file2.csv\"]}'))"; + Assertions.assertEquals(expectedCreateTableSql, preActionsSql.get(0)); - Assertions.assertEquals(String.format(expectedIngestSql, uuid), ingestSql.get(0)); + Assertions.assertEquals(expectedIngestSql, ingestSql.get(0)); + Assertions.assertEquals(expectedMetadataIngestSql, metadataIngestSql.get(0)); Assertions.assertEquals("SELECT 0 as \"ROWSDELETED\"", statsSql.get(ROWS_DELETED)); Assertions.assertEquals("SELECT 0 as \"ROWSTERMINATED\"", statsSql.get(ROWS_TERMINATED)); @@ -317,7 +312,7 @@ public void testBulkLoadStagedFilesDatasetNotProvided() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue("batch123") + .bulkLoadTaskIdValue("batch123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagingDataset)); @@ -357,7 +352,7 @@ public void testBulkLoadWithDigest() .relationalSink(SnowflakeSink.get()) .collectStatistics(true) .executionTimestampClock(fixedClock_2000_01_01) - .bulkLoadBatchIdValue("batch123") + .bulkLoadTaskIdValue("task123") .build(); GeneratorResult operations = generator.generateOperations(Datasets.of(mainDataset, stagedFilesDataset)); @@ -366,14 +361,14 @@ public void testBulkLoadWithDigest() List ingestSql = operations.ingestSql(); Map statsSql = operations.postIngestStatisticsSql(); - String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER NOT NULL PRIMARY KEY,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"batch_id\" VARCHAR,\"append_time\" DATETIME)"; + String expectedCreateTableSql = "CREATE TABLE IF NOT EXISTS \"my_db\".\"my_name\"(\"col_int\" INTEGER,\"col_integer\" INTEGER,\"digest\" VARCHAR,\"batch_id\" INTEGER,\"append_time\" DATETIME)"; String expectedIngestSql = "COPY INTO \"my_db\".\"my_name\" " + "(\"col_int\", \"col_integer\", \"digest\", \"batch_id\", \"append_time\") " + "FROM " + "(SELECT legend_persistence_stage.$1 as \"col_int\",legend_persistence_stage.$2 as \"col_integer\"," + "LAKEHOUSE_UDF(OBJECT_CONSTRUCT('col_int',legend_persistence_stage.$1,'col_integer',legend_persistence_stage.$2))," + - "'batch123','2000-01-01 00:00:00' " + + "(SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'MY_NAME'),'2000-01-01 00:00:00' " + "FROM my_location (FILE_FORMAT => 'my_file_format', " + "PATTERN => '(/path/xyz/file1.csv)|(/path/xyz/file2.csv)') as legend_persistence_stage) " + "on_error = 'ABORT_STATEMENT'"; diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java index 8ad9c6351ef..4a5a9dd4992 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-snowflake/src/test/java/org/finos/legend/engine/persistence/components/util/BulkLoadDatasetUtilsSnowflakeTest.java @@ -24,13 +24,13 @@ public String getExpectedSqlForMetadata() { return "INSERT INTO bulk_load_batch_metadata " + "(\"batch_id\", \"table_name\", \"batch_start_ts_utc\", \"batch_end_ts_utc\", \"batch_status\", \"batch_source_info\") " + - "(SELECT 'batch_id_123','appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"batch_id\"),0)+1 FROM bulk_load_batch_metadata as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"table_name\") = 'APPENG_LOG_TABLE_NAME'),'appeng_log_table_name','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } public String getExpectedSqlForMetadataUpperCase() { return "INSERT INTO BULK_LOAD_BATCH_METADATA (\"BATCH_ID\", \"TABLE_NAME\", \"BATCH_START_TS_UTC\", \"BATCH_END_TS_UTC\", \"BATCH_STATUS\", \"BATCH_SOURCE_INFO\") " + - "(SELECT 'batch_id_123','BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; + "(SELECT (SELECT COALESCE(MAX(bulk_load_batch_metadata.\"BATCH_ID\"),0)+1 FROM BULK_LOAD_BATCH_METADATA as bulk_load_batch_metadata WHERE UPPER(bulk_load_batch_metadata.\"TABLE_NAME\") = 'BULK_LOAD_TABLE_NAME'),'BULK_LOAD_TABLE_NAME','2000-01-01 00:00:00',SYSDATE(),'',PARSE_JSON('my_lineage_value'))"; } public RelationalSink getRelationalSink() From f6119121262241d709f280932fa4cf7a55c4b6e3 Mon Sep 17 00:00:00 2001 From: prasar-ashutosh Date: Thu, 19 Oct 2023 11:40:11 +0800 Subject: [PATCH 57/57] Address Code Review Comments --- .../components/common/FileFormat.java | 20 +---- .../bigquery/executor/BigQueryExecutor.java | 11 +-- .../bigquery/executor/BigQueryHelper.java | 73 +++++++++++-------- .../StagedFilesDatasetReferenceVisitor.java | 2 +- 4 files changed, 51 insertions(+), 55 deletions(-) diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java index 3d5f556970a..75cf32a3a55 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-logical-plan/src/main/java/org/finos/legend/engine/persistence/components/common/FileFormat.java @@ -16,20 +16,8 @@ public enum FileFormat { - CSV("CSV"), - JSON("JSON"), - AVRO("AVRO"), - PARQUET("PARQUET"); - - String name; - - FileFormat(String name) - { - this.name = name; - } - - public String getName() - { - return this.name; - } + CSV, + JSON, + AVRO, + PARQUET; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java index d2da804ec33..046d2088a44 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryExecutor.java @@ -75,15 +75,12 @@ public Map executeLoadPhysicalPlanAndGetStats(SqlPlan phy { List sqlList = physicalPlan.getSqlList(); - // The first SQL is a load statement - // Executed in a new transaction + // Load statement (Not supported in Bigquery to run in a transaction) Map loadStats = bigQueryHelper.executeLoadStatement(getEnrichedSql(placeholderKeyValues, sqlList.get(0))); - // The second SQL is an insert statement - // We need to first close the current transaction (if it exists) and open a new transaction - // Such that the result of the Load will be available to the Insert - bigQueryHelper.close(); - bigQueryHelper.executeStatement(getEnrichedSql(placeholderKeyValues, sqlList.get(1))); + // Isolation level of Bigquery is Snapshot, + // So Insert statement has to run in a new transaction so that it can see the changes of Load + bigQueryHelper.executeStatementInANewTransaction(getEnrichedSql(placeholderKeyValues, sqlList.get(1))); return loadStats; } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java index 1336c32eda2..261226baa04 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/executor/BigQueryHelper.java @@ -327,6 +327,12 @@ public void executeStatement(String sql) executeStatements(sqls); } + public void executeStatementInANewTransaction(String sql) + { + List sqls = Collections.singletonList(sql); + executeStatementsInANewTransaction(sqls); + } + // Execute statements in a transaction - either use an existing one or use a new one public void executeStatements(List sqls) { @@ -346,45 +352,50 @@ public void executeStatements(List sqls) } else { - BigQueryTransactionManager txManager = null; - try + executeStatementsInANewTransaction(sqls); + } + } + + public void executeStatementsInANewTransaction(List sqls) + { + BigQueryTransactionManager txManager = null; + try + { + txManager = new BigQueryTransactionManager(bigQuery); + txManager.beginTransaction(); + for (String sql : sqls) { - txManager = new BigQueryTransactionManager(bigQuery); - txManager.beginTransaction(); - for (String sql : sqls) - { - txManager.executeInCurrentTransaction(sql); - } - txManager.commitTransaction(); + txManager.executeInCurrentTransaction(sql); } - catch (Exception e) + txManager.commitTransaction(); + } + catch (Exception e) + { + LOGGER.error("Error executing SQL statements: " + sqls, e); + if (txManager != null) { - LOGGER.error("Error executing SQL statements: " + sqls, e); - if (txManager != null) + try { - try - { - txManager.revertTransaction(); - } - catch (InterruptedException e2) - { - throw new RuntimeException(e2); - } + txManager.revertTransaction(); + } + catch (InterruptedException e2) + { + throw new RuntimeException(e2); } - throw new RuntimeException(e); } - finally + throw new RuntimeException(e); + } + finally + { + if (txManager != null) { - if (txManager != null) + try { - try - { - txManager.close(); - } - catch (InterruptedException e) - { - LOGGER.error("Error closing transaction manager.", e); - } + txManager.close(); + } + catch (InterruptedException e) + { + LOGGER.error("Error closing transaction manager.", e); } } } diff --git a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java index 82f7045a630..0c5e7d91bc5 100644 --- a/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java +++ b/legend-engine-xts-persistence/legend-engine-xt-persistence-component/legend-engine-xt-persistence-component-relational-bigquery/src/main/java/org/finos/legend/engine/persistence/components/relational/bigquery/sql/visitor/StagedFilesDatasetReferenceVisitor.java @@ -40,7 +40,7 @@ public VisitorResult visit(PhysicalPlanNode prev, StagedFilesDatasetReference cu Map loadOptionsMap = new HashMap<>(); FileFormat fileFormat = datasetProperties.fileFormat(); - loadOptionsMap.put("format", fileFormat.getName()); + loadOptionsMap.put("format", fileFormat.name()); datasetProperties.loadOptions().ifPresent(options -> retrieveLoadOptions(fileFormat, options, loadOptionsMap)); StagedFilesTable stagedFilesTable = new StagedFilesTable(datasetProperties.files(), loadOptionsMap);