diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java b/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java index 963dbf188..4c8c1d4bc 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java @@ -418,18 +418,31 @@ private static ParquetBufferValue getStructValue( DataValidationUtil.validateAndParseIcebergStruct(path, value, insertRowsCurrIndex); Set extraFields = new HashSet<>(structVal.keySet()); List listVal = new ArrayList<>(type.getFieldCount()); + float estimatedParquetSize = 0f; for (int i = 0; i < type.getFieldCount(); i++) { + StringBuilder sb = new StringBuilder(); + String fieldName = type.getFieldName(i); + for (int j = 0; j < fieldName.length(); j++) { + if (fieldName.charAt(j) == '_') { + sb.append((char) Integer.parseInt(fieldName.substring(j + 2, j + 4), 16)); + j += 3; + } else { + sb.append(fieldName.charAt(j)); + } + } + String originalFieldName = sb.substring(0, sb.toString().lastIndexOf('_')); + ParquetBufferValue parsedValue = parseColumnValueToParquet( - structVal.getOrDefault(type.getFieldName(i), null), + structVal.getOrDefault(originalFieldName, null), type.getType(i), statsMap, defaultTimezone, insertRowsCurrIndex, path, isDescendantsOfRepeatingGroup); - extraFields.remove(type.getFieldName(i)); + extraFields.remove(originalFieldName); listVal.add(parsedValue.getValue()); estimatedParquetSize += parsedValue.getSize(); } diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java index 5e3fa1191..782c84647 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java @@ -86,7 +86,9 @@ public class ParquetRowBuffer extends AbstractRowBuffer { public void setupSchema(List columns) { fieldIndex.clear(); metadata.clear(); - metadata.put("sfVer", "1,1"); + if (!clientBufferParameters.getIsIcebergMode()) { + metadata.put("sfVer", "1,1"); + } List parquetTypes = new ArrayList<>(); int id = 1; diff --git a/src/main/java/net/snowflake/ingest/utils/IcebergDataTypeParser.java b/src/main/java/net/snowflake/ingest/utils/IcebergDataTypeParser.java index abb03cdef..f663fa108 100644 --- a/src/main/java/net/snowflake/ingest/utils/IcebergDataTypeParser.java +++ b/src/main/java/net/snowflake/ingest/utils/IcebergDataTypeParser.java @@ -154,7 +154,7 @@ public static Type getTypeFromJson(@Nonnull JsonNode jsonNode) { field.isObject(), "Cannot parse struct field from non-object: %s", field); int id = JsonUtil.getInt(ID, field); - String name = JsonUtil.getString(NAME, field); + String name = (JsonUtil.getString(NAME, field) + "_" + id).replace("_", "_x5F"); Type type = getTypeFromJson(field.get(TYPE)); String doc = JsonUtil.getStringOrNull(DOC, field); diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergNumericTypesIT.java b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergNumericTypesIT.java index e4b0783d4..81f4558e0 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergNumericTypesIT.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergNumericTypesIT.java @@ -1,10 +1,18 @@ +/* + * Copyright (c) 2024 Snowflake Computing Inc. All rights reserved. + */ + package net.snowflake.ingest.streaming.internal.datatypes; import java.math.BigDecimal; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.Random; import net.snowflake.ingest.utils.Constants; import net.snowflake.ingest.utils.ErrorCode; import net.snowflake.ingest.utils.SFException; +import org.apache.commons.lang3.RandomStringUtils; import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Ignore; @@ -26,6 +34,8 @@ public static Object[][] parameters() { @Parameterized.Parameter(1) public static Constants.IcebergSerializationPolicy icebergSerializationPolicy; + static final Random generator = new Random(0x5EED); + @Before public void before() throws Exception { super.beforeIceberg(compressionAlgorithm, icebergSerializationPolicy); @@ -306,6 +316,7 @@ public void testDecimal() throws Exception { testIcebergIngestion("decimal(3, 1)", 12.5f, new FloatProvider()); testIcebergIngestion("decimal(3, 1)", -99, new IntProvider()); testIcebergIngestion("decimal(38, 0)", Long.MAX_VALUE, new LongProvider()); + testIcebergIngestion("decimal(21, 0)", .0, new DoubleProvider()); testIcebergIngestion("decimal(38, 10)", null, new BigDecimalProvider()); testIcebergIngestion( @@ -368,5 +379,48 @@ public void testDecimalAndQueries() throws Exception { Arrays.asList(new BigDecimal("-12.3"), new BigDecimal("-12.3"), null), "select COUNT({columnName}) from {tableName} where {columnName} = -12.3", Arrays.asList(2L)); + + List bigDecimals_9_4 = randomBigDecimal(200, 9, 4); + testIcebergIngestAndQuery( + "decimal(9, 4)", bigDecimals_9_4, "select {columnName} from {tableName}", bigDecimals_9_4); + + List bigDecimals_18_9 = randomBigDecimal(200, 18, 9); + testIcebergIngestAndQuery( + "decimal(18, 9)", + bigDecimals_18_9, + "select {columnName} from {tableName}", + bigDecimals_18_9); + + List bigDecimals_21_0 = randomBigDecimal(200, 21, 0); + testIcebergIngestAndQuery( + "decimal(21, 0)", + bigDecimals_21_0, + "select {columnName} from {tableName}", + bigDecimals_21_0); + + List bigDecimals_38_10 = randomBigDecimal(200, 38, 10); + testIcebergIngestAndQuery( + "decimal(38, 10)", + bigDecimals_38_10, + "select {columnName} from {tableName}", + bigDecimals_38_10); + } + + private static List randomBigDecimal(int count, int precision, int scale) { + List list = new ArrayList<>(); + for (int i = 0; i < count; i++) { + int intPart = generator.nextInt(precision - scale + 1); + int floatPart = generator.nextInt(scale + 1); + if (intPart == 0 && floatPart == 0) { + list.add(null); + continue; + } + list.add( + new BigDecimal( + RandomStringUtils.randomNumeric(intPart) + + "." + + RandomStringUtils.randomNumeric(floatPart))); + } + return list; } }