-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
detach create and merge node logic to services
- Loading branch information
1 parent
7ff97e7
commit 665dbb8
Showing
7 changed files
with
221 additions
and
187 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
157 changes: 157 additions & 0 deletions
157
.../kafka/connector/internal/streaming/schemaevolution/iceberg/IcebergColumnTreeFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
package com.snowflake.kafka.connector.internal.streaming.schemaevolution.iceberg; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.node.ArrayNode; | ||
import com.fasterxml.jackson.databind.node.ObjectNode; | ||
import java.util.LinkedHashMap; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
import org.apache.iceberg.types.Type; | ||
import org.apache.iceberg.types.Types; | ||
import org.apache.kafka.connect.data.Field; | ||
import org.apache.kafka.connect.data.Schema; | ||
|
||
public class IcebergColumnTreeFactory { | ||
private final IcebergColumnTypeMapper mapper; | ||
|
||
public IcebergColumnTreeFactory() { | ||
this.mapper = new IcebergColumnTypeMapper(); | ||
} | ||
|
||
IcebergColumnTree fromIcebergSchema(IcebergColumnSchema columnSchema) { | ||
IcebergFieldNode rootNode = | ||
createNode(columnSchema.getColumnName().toUpperCase(), columnSchema.getSchema()); | ||
return new IcebergColumnTree(rootNode); | ||
} | ||
|
||
IcebergColumnTree fromJson(IcebergColumnJsonValuePair pair) { | ||
IcebergFieldNode rootNode = createNode(pair.getColumnName().toUpperCase(), pair.getJsonNode()); | ||
return new IcebergColumnTree(rootNode); | ||
} | ||
|
||
IcebergColumnTree fromConnectSchema(Field kafkaConnectField) { | ||
IcebergFieldNode rootNode = | ||
createNode(kafkaConnectField.name().toUpperCase(), kafkaConnectField.schema()); | ||
return new IcebergColumnTree(rootNode); | ||
} | ||
|
||
// -- parse tree from Iceberg schema logic -- | ||
private IcebergFieldNode createNode(String name, Type apacheIcebergSchema) { | ||
String snowflakeType = mapper.mapToColumnTypeFromIcebergSchema(apacheIcebergSchema); | ||
return new IcebergFieldNode(name, snowflakeType, produceChildren(apacheIcebergSchema)); | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildren(Type apacheIcebergSchema) { | ||
// primitives must not have children | ||
if (apacheIcebergSchema.isPrimitiveType()) { | ||
return new LinkedHashMap<>(); | ||
} | ||
Type.NestedType nestedField = apacheIcebergSchema.asNestedType(); | ||
return nestedField.fields().stream() | ||
.collect( | ||
Collectors.toMap( | ||
Types.NestedField::name, | ||
field -> createNode(field.name(), field.type()), | ||
// It's impossible to have two same keys | ||
(v1, v2) -> { | ||
throw new IllegalArgumentException("Two same keys: " + v1); | ||
}, | ||
LinkedHashMap::new)); | ||
} | ||
|
||
// -- parse tree from kafka record payload logic -- | ||
private IcebergFieldNode createNode(String name, JsonNode jsonNode) { | ||
String snowflakeType = mapper.mapToColumnTypeFromJson(jsonNode); | ||
return new IcebergFieldNode(name, snowflakeType, produceChildren(jsonNode)); | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildren(JsonNode recordNode) { | ||
if (recordNode.isNull()) { | ||
return new LinkedHashMap<>(); | ||
} | ||
if (recordNode.isArray()) { | ||
ArrayNode arrayNode = (ArrayNode) recordNode; | ||
return produceChildrenFromArray(arrayNode); | ||
} | ||
if (recordNode.isObject()) { | ||
ObjectNode objectNode = (ObjectNode) recordNode; | ||
return produceChildrenFromObject(objectNode); | ||
} | ||
return new LinkedHashMap<>(); | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildrenFromArray(ArrayNode arrayNode) { | ||
JsonNode arrayElement = arrayNode.get(0); | ||
// VARCHAR is set for an empty array: [] -> ARRAY(VARCHAR) | ||
if (arrayElement == null) { | ||
LinkedHashMap<String, IcebergFieldNode> child = new LinkedHashMap<>(); | ||
child.put( | ||
"element", new IcebergFieldNode("element", "VARCHAR(16777216)", new LinkedHashMap<>())); | ||
return child; | ||
} | ||
LinkedHashMap<String, IcebergFieldNode> child = new LinkedHashMap<>(); | ||
child.put("element", createNode("element", arrayElement)); | ||
return child; | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildrenFromObject(ObjectNode objectNode) { | ||
return objectNode.properties().stream() | ||
.collect( | ||
Collectors.toMap( | ||
Map.Entry::getKey, | ||
stringJsonNodeEntry -> | ||
createNode(stringJsonNodeEntry.getKey(), stringJsonNodeEntry.getValue()), | ||
(v1, v2) -> { | ||
throw new IllegalArgumentException("Two same keys: " + v1); | ||
}, | ||
LinkedHashMap::new)); | ||
} | ||
|
||
// -- parse tree from kafka record schema logic -- | ||
private IcebergFieldNode createNode(String name, Schema schema) { | ||
String snowflakeType = | ||
mapper.mapToColumnTypeFromKafkaSchema(schema.schema().type(), schema.schema().name()); | ||
return new IcebergFieldNode(name, snowflakeType, produceChildren(schema.schema())); | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildren(Schema connectSchema) { | ||
if (connectSchema.type() == Schema.Type.STRUCT) { | ||
return produceChildrenFromStruct(connectSchema); | ||
} | ||
if (connectSchema.type() == Schema.Type.MAP) { | ||
return produceChildrenFromMap(connectSchema); | ||
} | ||
if (connectSchema.type() == Schema.Type.ARRAY) { | ||
return produceChildrenForArray(connectSchema); | ||
} else { // isPrimitive == true | ||
return new LinkedHashMap<>(); | ||
} | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildrenForArray( | ||
Schema connectSchemaForArray) { | ||
LinkedHashMap<String, IcebergFieldNode> child = new LinkedHashMap<>(); | ||
child.put("element", createNode("element", connectSchemaForArray.valueSchema())); | ||
return child; | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildrenFromStruct(Schema connectSchema) { | ||
return connectSchema.fields().stream() | ||
.collect( | ||
Collectors.toMap( | ||
Field::name, | ||
f -> createNode(f.name(), f.schema()), | ||
(v1, v2) -> { | ||
throw new IllegalArgumentException("Two same keys: " + v1); | ||
}, | ||
LinkedHashMap::new)); | ||
} | ||
|
||
private LinkedHashMap<String, IcebergFieldNode> produceChildrenFromMap(Schema connectSchema) { | ||
LinkedHashMap<String, IcebergFieldNode> keyValue = new LinkedHashMap<>(); | ||
// these names will not be used when creating a query | ||
keyValue.put("key", createNode("key", connectSchema.keySchema())); | ||
keyValue.put("value", createNode("value", connectSchema.valueSchema())); | ||
return keyValue; | ||
} | ||
} |
39 changes: 39 additions & 0 deletions
39
...e/kafka/connector/internal/streaming/schemaevolution/iceberg/IcebergColumnTreeMerger.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package com.snowflake.kafka.connector.internal.streaming.schemaevolution.iceberg; | ||
|
||
import com.google.common.base.Preconditions; | ||
import com.snowflake.kafka.connector.internal.KCLogger; | ||
|
||
public class IcebergColumnTreeMerger { | ||
|
||
private final KCLogger LOGGER = new KCLogger(IcebergColumnTreeMerger.class.getName()); | ||
|
||
/** | ||
* Method designed for unstructured data types. Enhances already existing unstructured columns | ||
* with new subfields. | ||
*/ | ||
void merge(IcebergColumnTree currentTree, IcebergColumnTree treeWithNewType) { | ||
validate(currentTree, treeWithNewType); | ||
LOGGER.debug("Attempting to apply changes for column:" + currentTree.getColumnName()); | ||
|
||
merge(currentTree.getRootNode(), treeWithNewType.getRootNode()); | ||
} | ||
|
||
/** Method adds new children to a node. It does not change anything else. */ | ||
private void merge(IcebergFieldNode currentNode, IcebergFieldNode nodeToMerge) { | ||
nodeToMerge.children.forEach( | ||
(key, node) -> { | ||
IcebergFieldNode currentNodesChild = currentNode.children.get(key); | ||
if (currentNodesChild == null) { | ||
currentNode.children.put(key, node); | ||
} else { | ||
merge(currentNodesChild, node); | ||
} | ||
}); | ||
} | ||
|
||
private void validate(IcebergColumnTree currentTree, IcebergColumnTree treeWithNewType) { | ||
Preconditions.checkArgument( | ||
currentTree.getColumnName().equals(treeWithNewType.getColumnName()), | ||
"Error merging column schemas. Tried to merge schemas for two different columns"); | ||
} | ||
} |
Oops, something went wrong.