Skip to content

Commit

Permalink
[HUDI-7432] Fix excessive object creation in KeyGenUtils (#10721)
Browse files Browse the repository at this point in the history
Co-authored-by: Vova Kolmakov <[email protected]>
  • Loading branch information
wombatu-kun and Vova Kolmakov authored Feb 22, 2024
1 parent 397c057 commit 5c444ff
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,24 @@ public static String[] extractRecordKeysByFields(String recordKey, List<String>
public static String getRecordKey(GenericRecord record, List<String> recordKeyFields, boolean consistentLogicalTimestampEnabled) {
boolean keyIsNullEmpty = true;
StringBuilder recordKey = new StringBuilder();
for (String recordKeyField : recordKeyFields) {
for (int i = 0; i < recordKeyFields.size(); i++) {
String recordKeyField = recordKeyFields.get(i);
String recordKeyValue = HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true, consistentLogicalTimestampEnabled);
if (recordKeyValue == null) {
recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE + NULL_RECORDKEY_PLACEHOLDER + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(NULL_RECORDKEY_PLACEHOLDER);
} else if (recordKeyValue.isEmpty()) {
recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE + EMPTY_RECORDKEY_PLACEHOLDER + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(EMPTY_RECORDKEY_PLACEHOLDER);
} else {
recordKey.append(recordKeyField + DEFAULT_COMPOSITE_KEY_FILED_VALUE + recordKeyValue + DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
recordKey.append(recordKeyField).append(DEFAULT_COMPOSITE_KEY_FILED_VALUE).append(recordKeyValue);
keyIsNullEmpty = false;
}
if (i != recordKeyFields.size() - 1) {
recordKey.append(DEFAULT_RECORD_KEY_PARTS_SEPARATOR);
}
}
recordKey.deleteCharAt(recordKey.length() - 1);
if (keyIsNullEmpty) {
throw new HoodieKeyException("recordKey values: \"" + recordKey + "\" for fields: "
+ recordKeyFields.toString() + " cannot be entirely null or empty.");
+ recordKeyFields + " cannot be entirely null or empty.");
}
return recordKey.toString();
}
Expand All @@ -172,20 +175,27 @@ public static String getRecordPartitionPath(GenericRecord record, List<String> p
}

StringBuilder partitionPath = new StringBuilder();
for (String partitionPathField : partitionPathFields) {
for (int i = 0; i < partitionPathFields.size(); i++) {
String partitionPathField = partitionPathFields.get(i);
String fieldVal = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true, consistentLogicalTimestampEnabled);
if (fieldVal == null || fieldVal.isEmpty()) {
partitionPath.append(hiveStylePartitioning ? partitionPathField + "=" + HUDI_DEFAULT_PARTITION_PATH
: HUDI_DEFAULT_PARTITION_PATH);
if (hiveStylePartitioning) {
partitionPath.append(partitionPathField).append("=");
}
partitionPath.append(HUDI_DEFAULT_PARTITION_PATH);
} else {
if (encodePartitionPath) {
fieldVal = PartitionPathEncodeUtils.escapePathName(fieldVal);
}
partitionPath.append(hiveStylePartitioning ? partitionPathField + "=" + fieldVal : fieldVal);
if (hiveStylePartitioning) {
partitionPath.append(partitionPathField).append("=");
}
partitionPath.append(fieldVal);
}
if (i != partitionPathFields.size() - 1) {
partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
partitionPath.deleteCharAt(partitionPath.length() - 1);
return partitionPath.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public void testNullPartitionPathFields() {
@Test
public void testNullRecordKeyFields() {
GenericRecord record = getRecord();
Assertions.assertThrows(StringIndexOutOfBoundsException.class, () -> {
Assertions.assertThrows(HoodieKeyException.class, () -> {
ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp());
keyGenerator.getRecordKey(record);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ private TypedProperties getProps() {
@Test
public void testNullRecordKeyFields() {
GenericRecord record = getRecord();
Assertions.assertThrows(StringIndexOutOfBoundsException.class, () -> {
Assertions.assertThrows(HoodieKeyException.class, () -> {
BaseKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getPropertiesWithoutRecordKeyProp());
keyGenerator.getRecordKey(record);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ private TypedProperties getWrongRecordKeyFieldProps() {
@Test
public void testNullRecordKeyFields() {
GenericRecord record = getRecord();
Assertions.assertThrows(StringIndexOutOfBoundsException.class, () -> {
Assertions.assertThrows(HoodieKeyException.class, () -> {
BaseKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(getPropertiesWithoutRecordKeyProp());
keyGenerator.getRecordKey(record);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
}

// Record's key field not specified
assertThrows(classOf[StringIndexOutOfBoundsException]) {
assertThrows(classOf[HoodieKeyException]) {
val props = new TypedProperties()
props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField")
val keyGen = new ComplexKeyGenerator(props)
Expand Down Expand Up @@ -494,7 +494,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
val props = new TypedProperties()
props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField")

assertThrows(classOf[StringIndexOutOfBoundsException]) {
assertThrows(classOf[HoodieKeyException]) {
new GlobalDeleteKeyGenerator(props).getRecordKey(baseRecord)
}
}
Expand Down

0 comments on commit 5c444ff

Please sign in to comment.