generated from finos/software-project-blueprint
-
Notifications
You must be signed in to change notification settings - Fork 237
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Persistence Library: Dry run mode support for Bulk Load and Capture D…
…ata Errors (#2642) Co-authored-by: kumuwu <[email protected]>
- Loading branch information
1 parent
8e08a15
commit 20de9ff
Showing
116 changed files
with
4,132 additions
and
653 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
108 changes: 108 additions & 0 deletions
108
...d/engine/persistence/components/ingestmode/versioning/DeriveDataErrorRowsLogicalPlan.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
// Copyright 2024 Goldman Sachs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package org.finos.legend.engine.persistence.components.ingestmode.versioning; | ||
|
||
import org.finos.legend.engine.persistence.components.logicalplan.LogicalPlan; | ||
import org.finos.legend.engine.persistence.components.logicalplan.conditions.GreaterThan; | ||
import org.finos.legend.engine.persistence.components.logicalplan.datasets.Dataset; | ||
import org.finos.legend.engine.persistence.components.logicalplan.datasets.Selection; | ||
import org.finos.legend.engine.persistence.components.logicalplan.values.*; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class DeriveDataErrorRowsLogicalPlan implements VersioningStrategyVisitor<LogicalPlan> | ||
{ | ||
private List<String> primaryKeys; | ||
private List<String> remainingColumns; | ||
private Dataset tempStagingDataset; | ||
private int sampleRowCount; | ||
|
||
public static final String DATA_VERSION_ERROR_COUNT = "legend_persistence_error_count"; | ||
|
||
public DeriveDataErrorRowsLogicalPlan(List<String> primaryKeys, List<String> remainingColumns, Dataset tempStagingDataset, int sampleRowCount) | ||
{ | ||
this.primaryKeys = primaryKeys; | ||
this.remainingColumns = remainingColumns; | ||
this.tempStagingDataset = tempStagingDataset; | ||
this.sampleRowCount = sampleRowCount; | ||
} | ||
|
||
@Override | ||
public LogicalPlan visitNoVersioningStrategy(NoVersioningStrategyAbstract noVersioningStrategy) | ||
{ | ||
return null; | ||
} | ||
|
||
@Override | ||
public LogicalPlan visitMaxVersionStrategy(MaxVersionStrategyAbstract maxVersionStrategy) | ||
{ | ||
if (maxVersionStrategy.performStageVersioning()) | ||
{ | ||
return getLogicalPlanForDataErrors(maxVersionStrategy.versioningField()); | ||
} | ||
else | ||
{ | ||
return null; | ||
} | ||
} | ||
|
||
@Override | ||
public LogicalPlan visitAllVersionsStrategy(AllVersionsStrategyAbstract allVersionsStrategyAbstract) | ||
{ | ||
if (allVersionsStrategyAbstract.performStageVersioning()) | ||
{ | ||
return getLogicalPlanForDataErrors(allVersionsStrategyAbstract.versioningField()); | ||
} | ||
else | ||
{ | ||
return null; | ||
} | ||
} | ||
|
||
private LogicalPlan getLogicalPlanForDataErrors(String versionField) | ||
{ | ||
List<Value> pKsAndVersion = new ArrayList<>(); | ||
for (String pk: primaryKeys) | ||
{ | ||
pKsAndVersion.add(FieldValue.builder().fieldName(pk).build()); | ||
} | ||
pKsAndVersion.add(FieldValue.builder().fieldName(versionField).build()); | ||
|
||
List<Value> distinctValueFields = new ArrayList<>(); | ||
for (String field: remainingColumns) | ||
{ | ||
distinctValueFields.add(FieldValue.builder().fieldName(field).build()); | ||
} | ||
|
||
FunctionImpl countDistinct = FunctionImpl.builder() | ||
.functionName(FunctionName.COUNT) | ||
.addValue(FunctionImpl.builder().functionName(FunctionName.DISTINCT).addAllValue(distinctValueFields).build()) | ||
.alias(DATA_VERSION_ERROR_COUNT) | ||
.build(); | ||
|
||
Selection selectDataError = Selection.builder() | ||
.source(tempStagingDataset) | ||
.groupByFields(pKsAndVersion) | ||
.addAllFields(pKsAndVersion) | ||
.addFields(countDistinct) | ||
.havingCondition(GreaterThan.of(FieldValue.builder().fieldName(DATA_VERSION_ERROR_COUNT).build(), ObjectValue.of(1))) | ||
.limit(sampleRowCount) | ||
.build(); | ||
|
||
return LogicalPlan.builder().addOps(selectDataError).build(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
...egend/engine/persistence/components/logicalplan/values/MetadataFileNameFieldAbstract.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Copyright 2024 Goldman Sachs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package org.finos.legend.engine.persistence.components.logicalplan.values; | ||
|
||
import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; | ||
|
||
import static org.immutables.value.Value.Immutable; | ||
import static org.immutables.value.Value.Style; | ||
|
||
@Immutable | ||
@Style( | ||
typeAbstract = "*Abstract", | ||
typeImmutable = "*", | ||
jdkOnly = true, | ||
optionalAcceptNullable = true, | ||
strictBuilder = true | ||
) | ||
public interface MetadataFileNameFieldAbstract extends Value | ||
{ | ||
StagedFilesDatasetProperties stagedFilesDatasetProperties(); | ||
} |
33 changes: 33 additions & 0 deletions
33
...gend/engine/persistence/components/logicalplan/values/MetadataRowNumberFieldAbstract.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Copyright 2024 Goldman Sachs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package org.finos.legend.engine.persistence.components.logicalplan.values; | ||
|
||
import org.finos.legend.engine.persistence.components.logicalplan.datasets.StagedFilesDatasetProperties; | ||
|
||
import static org.immutables.value.Value.Immutable; | ||
import static org.immutables.value.Value.Style; | ||
|
||
@Immutable | ||
@Style( | ||
typeAbstract = "*Abstract", | ||
typeImmutable = "*", | ||
jdkOnly = true, | ||
optionalAcceptNullable = true, | ||
strictBuilder = true | ||
) | ||
public interface MetadataRowNumberFieldAbstract extends Value | ||
{ | ||
StagedFilesDatasetProperties stagedFilesDatasetProperties(); | ||
} |
38 changes: 38 additions & 0 deletions
38
...inos/legend/engine/persistence/components/logicalplan/values/TryCastFunctionAbstract.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// Copyright 2024 Goldman Sachs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package org.finos.legend.engine.persistence.components.logicalplan.values; | ||
|
||
import org.finos.legend.engine.persistence.components.logicalplan.datasets.FieldType; | ||
|
||
import static org.immutables.value.Value.Immutable; | ||
import static org.immutables.value.Value.Parameter; | ||
import static org.immutables.value.Value.Style; | ||
|
||
@Immutable | ||
@Style( | ||
typeAbstract = "*Abstract", | ||
typeImmutable = "*", | ||
jdkOnly = true, | ||
optionalAcceptNullable = true, | ||
strictBuilder = true | ||
) | ||
public interface TryCastFunctionAbstract extends Value | ||
{ | ||
@Parameter(order = 0) | ||
Value field(); | ||
|
||
@Parameter(order = 1) | ||
FieldType type(); | ||
} |
Oops, something went wrong.