snowflakedb · sfc-gh-alhuang · Oct 17, 2024 · Oct 18, 2024 · sfc-gh-hmadan · Oct 21, 2024
@@ -418,18 +418,31 @@ private static ParquetBufferValue getStructValue(
         DataValidationUtil.validateAndParseIcebergStruct(path, value, insertRowsCurrIndex);
     Set<String> extraFields = new HashSet<>(structVal.keySet());
     List<Object> listVal = new ArrayList<>(type.getFieldCount());
+
     float estimatedParquetSize = 0f;
     for (int i = 0; i < type.getFieldCount(); i++) {
+      StringBuilder sb = new StringBuilder();
+      String fieldName = type.getFieldName(i);
+      for (int j = 0; j < fieldName.length(); j++) {
+        if (fieldName.charAt(j) == '_') {
+          sb.append((char) Integer.parseInt(fieldName.substring(j + 2, j + 4), 16));
+          j += 3;
+        } else {
+          sb.append(fieldName.charAt(j));
+        }
+      }
+      String originalFieldName = sb.substring(0, sb.toString().lastIndexOf('_'));
+
       ParquetBufferValue parsedValue =
           parseColumnValueToParquet(
-              structVal.getOrDefault(type.getFieldName(i), null),
+              structVal.getOrDefault(originalFieldName, null),
               type.getType(i),
               statsMap,
               defaultTimezone,
               insertRowsCurrIndex,
               path,
               isDescendantsOfRepeatingGroup);
-      extraFields.remove(type.getFieldName(i));
+      extraFields.remove(originalFieldName);
       listVal.add(parsedValue.getValue());
       estimatedParquetSize += parsedValue.getSize();
     }

@@ -86,7 +86,9 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
   public void setupSchema(List<ColumnMetadata> columns) {
     fieldIndex.clear();
     metadata.clear();
-    metadata.put("sfVer", "1,1");
+    if (!clientBufferParameters.getIsIcebergMode()) {
+      metadata.put("sfVer", "1,1");
+    }
     List<Type> parquetTypes = new ArrayList<>();
     int id = 1;
 

@@ -154,7 +154,7 @@ public static Type getTypeFromJson(@Nonnull JsonNode jsonNode) {
           field.isObject(), "Cannot parse struct field from non-object: %s", field);
 
       int id = JsonUtil.getInt(ID, field);
-      String name = JsonUtil.getString(NAME, field);
+      String name = (JsonUtil.getString(NAME, field) + "_" + id).replace("_", "_x5F");
       Type type = getTypeFromJson(field.get(TYPE));
 
       String doc = JsonUtil.getStringOrNull(DOC, field);

@@ -1,10 +1,18 @@
+/*
+ * Copyright (c) 2024 Snowflake Computing Inc. All rights reserved.
+ */
+
 package net.snowflake.ingest.streaming.internal.datatypes;
 
 import java.math.BigDecimal;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
 import net.snowflake.ingest.utils.Constants;
 import net.snowflake.ingest.utils.ErrorCode;
 import net.snowflake.ingest.utils.SFException;
+import org.apache.commons.lang3.RandomStringUtils;
 import org.assertj.core.api.Assertions;
 import org.junit.Before;
 import org.junit.Ignore;
@@ -26,6 +34,8 @@ public static Object[][] parameters() {
   @Parameterized.Parameter(1)
   public static Constants.IcebergSerializationPolicy icebergSerializationPolicy;
 
+  static final Random generator = new Random(0x5EED);
+
   @Before
   public void before() throws Exception {
     super.beforeIceberg(compressionAlgorithm, icebergSerializationPolicy);
@@ -306,6 +316,7 @@ public void testDecimal() throws Exception {
     testIcebergIngestion("decimal(3, 1)", 12.5f, new FloatProvider());
     testIcebergIngestion("decimal(3, 1)", -99, new IntProvider());
     testIcebergIngestion("decimal(38, 0)", Long.MAX_VALUE, new LongProvider());
+    testIcebergIngestion("decimal(21, 0)", .0, new DoubleProvider());
     testIcebergIngestion("decimal(38, 10)", null, new BigDecimalProvider());
 
     testIcebergIngestion(
@@ -368,5 +379,48 @@ public void testDecimalAndQueries() throws Exception {
         Arrays.asList(new BigDecimal("-12.3"), new BigDecimal("-12.3"), null),
         "select COUNT({columnName}) from {tableName} where {columnName} = -12.3",
         Arrays.asList(2L));
+
+    List<Object> bigDecimals_9_4 = randomBigDecimal(200, 9, 4);
+    testIcebergIngestAndQuery(
+        "decimal(9, 4)", bigDecimals_9_4, "select {columnName} from {tableName}", bigDecimals_9_4);
+
+    List<Object> bigDecimals_18_9 = randomBigDecimal(200, 18, 9);
+    testIcebergIngestAndQuery(
+        "decimal(18, 9)",
+        bigDecimals_18_9,
+        "select {columnName} from {tableName}",
+        bigDecimals_18_9);
+
+    List<Object> bigDecimals_21_0 = randomBigDecimal(200, 21, 0);
+    testIcebergIngestAndQuery(
+        "decimal(21, 0)",
+        bigDecimals_21_0,
+        "select {columnName} from {tableName}",
+        bigDecimals_21_0);
+
+    List<Object> bigDecimals_38_10 = randomBigDecimal(200, 38, 10);
+    testIcebergIngestAndQuery(
+        "decimal(38, 10)",
+        bigDecimals_38_10,
+        "select {columnName} from {tableName}",
+        bigDecimals_38_10);
+  }
+
+  private static List<Object> randomBigDecimal(int count, int precision, int scale) {
+    List<Object> list = new ArrayList<>();
+    for (int i = 0; i < count; i++) {
+      int intPart = generator.nextInt(precision - scale + 1);
+      int floatPart = generator.nextInt(scale + 1);
+      if (intPart == 0 && floatPart == 0) {
+        list.add(null);
+        continue;
+      }
+      list.add(
+          new BigDecimal(
+              RandomStringUtils.randomNumeric(intPart)
+                  + "."
+                  + RandomStringUtils.randomNumeric(floatPart)));
+    }
+    return list;
   }
 }