From eeb56c00f1e4e83e35bdcb51a87e6feb931b6928 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 7 Dec 2022 20:26:05 +0000
Subject: [PATCH 01/29] fix issue

---
 .../net/snowflake/ingest/connection/RequestBuilder.java   | 8 +++++---
 .../snowflake/ingest/streaming/internal/FlushService.java | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/main/java/net/snowflake/ingest/connection/RequestBuilder.java b/src/main/java/net/snowflake/ingest/connection/RequestBuilder.java
index 2b438a8dd..b428d23eb 100644
--- a/src/main/java/net/snowflake/ingest/connection/RequestBuilder.java
+++ b/src/main/java/net/snowflake/ingest/connection/RequestBuilder.java
@@ -313,7 +313,7 @@ private static Properties loadProperties() {
       return properties;
     }
 
-    LOGGER.debug("Loaded project version " + properties.getProperty("version"));
+    LOGGER.info("Loaded project version " + properties.getProperty("version"));
     return properties;
   }
 
@@ -345,9 +345,11 @@ private static String getDefaultUserAgent() {
 
     // Add Java Version
     final String javaVersion = System.getProperty("java.version");
-    defaultUserAgent.append(JAVA_USER_AGENT + "/" + javaVersion);
+    defaultUserAgent.append(JAVA_USER_AGENT + "/").append(javaVersion);
+    String userAgent = defaultUserAgent.toString();
 
-    return defaultUserAgent.toString();
+    LOGGER.info("Default user agent " + userAgent);
+    return userAgent;
   }
 
   private static String buildCustomUserAgent(String additionalUserAgentInfo) {
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index a6a80f275..7d75a34d3 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -141,7 +141,7 @@ List<List<ChannelData<T>>> getData() {
     this.isNeedFlush = false;
     this.lastFlushTime = System.currentTimeMillis();
     this.isTestMode = isTestMode;
-    this.latencyTimerContextMap = new HashMap<>();
+    this.latencyTimerContextMap = new ConcurrentHashMap<>();
     this.bdecVersion = this.owningClient.getParameterProvider().getBlobFormatVersion();
     createWorkers();
   }

From 911c63fc2cfc6bea1ebcc595e297537da76ba6c8 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Thu, 16 Feb 2023 22:57:55 -0800
Subject: [PATCH 02/29] encryption

---
 .../streaming/internal/AbstractRowBuffer.java | 15 +--
 .../streaming/internal/FlushService.java      | 91 +++++++++----------
 2 files changed, 47 insertions(+), 59 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
index 973b6e81b..5c7140fcb 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
@@ -5,25 +5,14 @@
 package net.snowflake.ingest.streaming.internal;
 
 import java.time.ZoneId;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
+import java.util.*;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import net.snowflake.ingest.streaming.InsertValidationResponse;
 import net.snowflake.ingest.streaming.OpenChannelRequest;
-import net.snowflake.ingest.utils.Constants;
-import net.snowflake.ingest.utils.ErrorCode;
-import net.snowflake.ingest.utils.Logging;
-import net.snowflake.ingest.utils.Pair;
-import net.snowflake.ingest.utils.SFException;
-import net.snowflake.ingest.utils.Utils;
+import net.snowflake.ingest.utils.*;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.VisibleForTesting;
 
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index b4b318a8c..022e85c38 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -4,11 +4,7 @@
 
 package net.snowflake.ingest.streaming.internal;
 
-import static net.snowflake.ingest.utils.Constants.BLOB_EXTENSION_TYPE;
-import static net.snowflake.ingest.utils.Constants.DISABLE_BACKGROUND_FLUSH;
-import static net.snowflake.ingest.utils.Constants.MAX_BLOB_SIZE_IN_BYTES;
-import static net.snowflake.ingest.utils.Constants.MAX_THREAD_COUNT;
-import static net.snowflake.ingest.utils.Constants.THREAD_SHUTDOWN_TIMEOUT_IN_SEC;
+import static net.snowflake.ingest.utils.Constants.*;
 import static net.snowflake.ingest.utils.Utils.getStackTrace;
 
 import com.codahale.metrics.Timer;
@@ -17,35 +13,15 @@
 import java.security.InvalidAlgorithmParameterException;
 import java.security.InvalidKeyException;
 import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
+import java.util.*;
+import java.util.concurrent.*;
 import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.atomic.AtomicReference;
 import javax.crypto.BadPaddingException;
 import javax.crypto.IllegalBlockSizeException;
 import javax.crypto.NoSuchPaddingException;
 import net.snowflake.client.jdbc.SnowflakeSQLException;
 import net.snowflake.client.jdbc.internal.google.common.util.concurrent.ThreadFactoryBuilder;
-import net.snowflake.ingest.utils.Constants;
-import net.snowflake.ingest.utils.ErrorCode;
-import net.snowflake.ingest.utils.Logging;
-import net.snowflake.ingest.utils.Pair;
-import net.snowflake.ingest.utils.SFException;
-import net.snowflake.ingest.utils.Utils;
+import net.snowflake.ingest.utils.*;
 import org.apache.arrow.util.VisibleForTesting;
 import org.apache.arrow.vector.VectorSchemaRoot;
 
@@ -341,30 +317,53 @@ void distributeFlushTasks() {
 
     while (itr.hasNext()) {
       List<List<ChannelData<T>>> blobData = new ArrayList<>();
-      AtomicReference<Float> totalBufferSizeInBytes = new AtomicReference<>((float) 0);
-
+      List<ChannelData<T>> leftoverChannelsDataPerTable = new ArrayList<>();
+      float totalBufferSizeInBytes = 0F;
       final String filePath = getFilePath(this.targetStage.getClientPrefix());
 
       // Distribute work at table level, create a new blob if reaching the blob size limit
-      while (itr.hasNext() && totalBufferSizeInBytes.get() <= MAX_BLOB_SIZE_IN_BYTES) {
-        ConcurrentHashMap<String, SnowflakeStreamingIngestChannelInternal<T>> table =
-            itr.next().getValue();
+      while (itr.hasNext() || !leftoverChannelsDataPerTable.isEmpty()) {
         List<ChannelData<T>> channelsDataPerTable = Collections.synchronizedList(new ArrayList<>());
-        // Use parallel stream since getData could be the performance bottleneck when we have a high
-        // number of channels
-        table.values().parallelStream()
-            .forEach(
-                channel -> {
-                  if (channel.isValid()) {
-                    ChannelData<T> data = channel.getData(filePath);
-                    if (data != null) {
-                      channelsDataPerTable.add(data);
-                      totalBufferSizeInBytes.updateAndGet(v -> v + data.getBufferSize());
+        if (!leftoverChannelsDataPerTable.isEmpty()) {
+          channelsDataPerTable.addAll(leftoverChannelsDataPerTable);
+          leftoverChannelsDataPerTable.clear();
+        } else {
+          ConcurrentHashMap<String, SnowflakeStreamingIngestChannelInternal<T>> table =
+              itr.next().getValue();
+          // Use parallel stream since getData could be the performance bottleneck when we have a
+          // high number of channels
+          table.values().parallelStream()
+              .forEach(
+                  channel -> {
+                    if (channel.isValid()) {
+                      ChannelData<T> data = channel.getData(filePath);
+                      if (data != null) {
+                        channelsDataPerTable.add(data);
+                      }
                     }
-                  }
-                });
+                  });
+        }
+
         if (!channelsDataPerTable.isEmpty()) {
-          blobData.add(channelsDataPerTable);
+          int idx = 0;
+          while (idx < channelsDataPerTable.size()) {
+            ChannelData<T> channelData = channelsDataPerTable.get(idx);
+            totalBufferSizeInBytes += channelData.getBufferSize();
+            if (totalBufferSizeInBytes > MAX_BLOB_SIZE_IN_BYTES
+                || (idx > 0
+                    && !Objects.equals(
+                        channelData.getChannelContext().getEncryptionKeyId(),
+                        channelsDataPerTable
+                            .get(idx - 1)
+                            .getChannelContext()
+                            .getEncryptionKeyId()))) {
+              leftoverChannelsDataPerTable.addAll(
+                  channelsDataPerTable.subList(idx + 1, channelsDataPerTable.size()));
+              break;
+            }
+            idx++;
+          }
+          blobData.add(channelsDataPerTable.subList(0, idx));
         }
       }
 

From b0a33d9d1d1b0ed903aae6da988e7c13783e6054 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Thu, 16 Feb 2023 23:01:06 -0800
Subject: [PATCH 03/29] fix format

---
 .../streaming/internal/AbstractRowBuffer.java | 15 +++++++--
 .../streaming/internal/FlushService.java      | 32 ++++++++++++++++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
index 5c7140fcb..973b6e81b 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
@@ -5,14 +5,25 @@
 package net.snowflake.ingest.streaming.internal;
 
 import java.time.ZoneId;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import net.snowflake.ingest.streaming.InsertValidationResponse;
 import net.snowflake.ingest.streaming.OpenChannelRequest;
-import net.snowflake.ingest.utils.*;
+import net.snowflake.ingest.utils.Constants;
+import net.snowflake.ingest.utils.ErrorCode;
+import net.snowflake.ingest.utils.Logging;
+import net.snowflake.ingest.utils.Pair;
+import net.snowflake.ingest.utils.SFException;
+import net.snowflake.ingest.utils.Utils;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.VisibleForTesting;
 
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 022e85c38..6262dd95e 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -4,7 +4,11 @@
 
 package net.snowflake.ingest.streaming.internal;
 
-import static net.snowflake.ingest.utils.Constants.*;
+import static net.snowflake.ingest.utils.Constants.BLOB_EXTENSION_TYPE;
+import static net.snowflake.ingest.utils.Constants.DISABLE_BACKGROUND_FLUSH;
+import static net.snowflake.ingest.utils.Constants.MAX_BLOB_SIZE_IN_BYTES;
+import static net.snowflake.ingest.utils.Constants.MAX_THREAD_COUNT;
+import static net.snowflake.ingest.utils.Constants.THREAD_SHUTDOWN_TIMEOUT_IN_SEC;
 import static net.snowflake.ingest.utils.Utils.getStackTrace;
 
 import com.codahale.metrics.Timer;
@@ -13,15 +17,35 @@
 import java.security.InvalidAlgorithmParameterException;
 import java.security.InvalidKeyException;
 import java.security.NoSuchAlgorithmException;
-import java.util.*;
-import java.util.concurrent.*;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TimeZone;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 import javax.crypto.BadPaddingException;
 import javax.crypto.IllegalBlockSizeException;
 import javax.crypto.NoSuchPaddingException;
 import net.snowflake.client.jdbc.SnowflakeSQLException;
 import net.snowflake.client.jdbc.internal.google.common.util.concurrent.ThreadFactoryBuilder;
-import net.snowflake.ingest.utils.*;
+import net.snowflake.ingest.utils.Constants;
+import net.snowflake.ingest.utils.ErrorCode;
+import net.snowflake.ingest.utils.Logging;
+import net.snowflake.ingest.utils.Pair;
+import net.snowflake.ingest.utils.SFException;
+import net.snowflake.ingest.utils.Utils;
 import org.apache.arrow.util.VisibleForTesting;
 import org.apache.arrow.vector.VectorSchemaRoot;
 

From ff14184daee68112ffeeda40bf7caa2eb963f605 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 22 Feb 2023 15:30:30 -0800
Subject: [PATCH 04/29] add tests

---
 .../streaming/internal/FlushService.java      | 32 ++++++++---
 .../streaming/internal/FlushServiceTest.java  | 55 ++++++++++++++++++-
 2 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 6262dd95e..7758e30e6 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -338,14 +338,15 @@ void distributeFlushTasks() {
                 String, ConcurrentHashMap<String, SnowflakeStreamingIngestChannelInternal<T>>>>
         itr = this.channelCache.iterator();
     List<Pair<BlobData<T>, CompletableFuture<BlobMetadata>>> blobs = new ArrayList<>();
+    List<ChannelData<T>> leftoverChannelsDataPerTable = new ArrayList<>();
 
-    while (itr.hasNext()) {
+    while (itr.hasNext() || !leftoverChannelsDataPerTable.isEmpty()) {
       List<List<ChannelData<T>>> blobData = new ArrayList<>();
-      List<ChannelData<T>> leftoverChannelsDataPerTable = new ArrayList<>();
       float totalBufferSizeInBytes = 0F;
       final String filePath = getFilePath(this.targetStage.getClientPrefix());
 
-      // Distribute work at table level, create a new blob if reaching the blob size limit
+      // Distribute work at table level, split the blob if reaching the blob size limit or the
+      // channel has different encryption key ids
       while (itr.hasNext() || !leftoverChannelsDataPerTable.isEmpty()) {
         List<ChannelData<T>> channelsDataPerTable = Collections.synchronizedList(new ArrayList<>());
         if (!leftoverChannelsDataPerTable.isEmpty()) {
@@ -372,22 +373,37 @@ void distributeFlushTasks() {
           int idx = 0;
           while (idx < channelsDataPerTable.size()) {
             ChannelData<T> channelData = channelsDataPerTable.get(idx);
-            totalBufferSizeInBytes += channelData.getBufferSize();
-            if (totalBufferSizeInBytes > MAX_BLOB_SIZE_IN_BYTES
-                || (idx > 0
-                    && !Objects.equals(
+            // Stop processing the rest of channels if reaching the blob size limit or the channel
+            // has different encryption key ids
+            if (idx > 0
+                && (totalBufferSizeInBytes + channelData.getBufferSize() > MAX_BLOB_SIZE_IN_BYTES
+                    || !Objects.equals(
                         channelData.getChannelContext().getEncryptionKeyId(),
                         channelsDataPerTable
                             .get(idx - 1)
                             .getChannelContext()
                             .getEncryptionKeyId()))) {
               leftoverChannelsDataPerTable.addAll(
-                  channelsDataPerTable.subList(idx + 1, channelsDataPerTable.size()));
+                  channelsDataPerTable.subList(idx, channelsDataPerTable.size()));
+              logger.logInfo(
+                  "Creation of another blob is needed because of blob size limit or different"
+                      + " encryption ids, client={}, table={},  size={}, encryptionId1={},"
+                      + " encryptionId2={}",
+                  this.owningClient.getName(),
+                  channelData.getChannelContext().getTableName(),
+                  totalBufferSizeInBytes + channelData.getBufferSize(),
+                  channelData.getChannelContext().getEncryptionKeyId(),
+                  channelsDataPerTable.get(idx - 1).getChannelContext().getEncryptionKeyId());
               break;
             }
+            totalBufferSizeInBytes += channelData.getBufferSize();
             idx++;
           }
+          // Add processed channels to the current blob, stop if we need to create a new blob
           blobData.add(channelsDataPerTable.subList(0, idx));
+          if (idx != channelsDataPerTable.size()) {
+            break;
+          }
         }
       }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index 90344b8a6..f70a3e0dc 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -57,8 +57,7 @@
 public class FlushServiceTest {
   @Parameterized.Parameters(name = "{0}")
   public static Collection<Object[]> testContextFactory() {
-    return Arrays.asList(
-        new Object[][] {{ArrowTestContext.createFactory()}, {ParquetTestContext.createFactory()}});
+    return Arrays.asList(new Object[][] {{ArrowTestContext.createFactory()}});
   }
 
   public FlushServiceTest(TestContextFactory<?> testContextFactory) {
@@ -349,6 +348,8 @@ private SnowflakeStreamingIngestChannelInternal<?> addChannel1(TestContext<?> te
         .setOffsetToken("offset1")
         .setChannelSequencer(0L)
         .setRowSequencer(0L)
+        .setEncryptionKey("key")
+        .setEncryptionKeyId(1L)
         .buildAndAdd();
   }
 
@@ -361,6 +362,8 @@ private SnowflakeStreamingIngestChannelInternal<?> addChannel2(TestContext<?> te
         .setOffsetToken("offset2")
         .setChannelSequencer(10L)
         .setRowSequencer(100L)
+        .setEncryptionKey("key")
+        .setEncryptionKeyId(1L)
         .buildAndAdd();
   }
 
@@ -373,6 +376,22 @@ private SnowflakeStreamingIngestChannelInternal<?> addChannel3(TestContext<?> te
         .setOffsetToken("offset3")
         .setChannelSequencer(0L)
         .setRowSequencer(0L)
+        .setEncryptionKey("key3")
+        .setEncryptionKeyId(3L)
+        .buildAndAdd();
+  }
+
+  private SnowflakeStreamingIngestChannelInternal<?> addChannel4(TestContext<?> testContext) {
+    return testContext
+        .channelBuilder("channel4")
+        .setDBName("db1")
+        .setSchemaName("schema1")
+        .setTableName("table1")
+        .setOffsetToken("offset2")
+        .setChannelSequencer(10L)
+        .setRowSequencer(100L)
+        .setEncryptionKey("key4")
+        .setEncryptionKeyId(4L)
         .buildAndAdd();
   }
 
@@ -460,6 +479,38 @@ public void testFlush() throws Exception {
     Assert.assertTrue(flushService.lastFlushTime > 0);
   }
 
+  @Test
+  public void testBlobCreation() throws Exception {
+    TestContext<?> testContext = testContextFactory.create();
+    SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
+    SnowflakeStreamingIngestChannelInternal<?> channel2 = addChannel2(testContext);
+    SnowflakeStreamingIngestChannelInternal<?> channel4 = addChannel4(testContext);
+
+    List<ColumnMetadata> schema = Arrays.asList(createTestIntegerColumn(), createTestTextColumn());
+    channel1.getRowBuffer().setupSchema(schema);
+    channel2.getRowBuffer().setupSchema(schema);
+    channel4.getRowBuffer().setupSchema(schema);
+
+    List<Map<String, Object>> rows1 =
+        RowSetBuilder.newBuilder()
+            .addColumn("COLINT", 11)
+            .addColumn("COLCHAR", "bob")
+            .newRow()
+            .addColumn("COLINT", 22)
+            .addColumn("COLCHAR", "bob")
+            .build();
+
+    channel1.insertRows(rows1, "offset1");
+    channel2.insertRows(rows1, "offset2");
+    channel4.insertRows(rows1, "offset4");
+
+    FlushService<?> flushService = testContext.flushService;
+
+    // Force = true flushes
+    flushService.flush(true).get();
+    Mockito.verify(flushService, Mockito.atLeast(2)).buildAndUpload(Mockito.any(), Mockito.any());
+  }
+
   @Test
   public void testBuildAndUpload() throws Exception {
     TestContext<?> testContext = testContextFactory.create();

From 55fdb5902864403f5bb5607b200341b9eb925554 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 22 Feb 2023 15:33:54 -0800
Subject: [PATCH 05/29] update

---
 .../snowflake/ingest/streaming/internal/FlushServiceTest.java  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index f70a3e0dc..0c485b94b 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -57,7 +57,8 @@
 public class FlushServiceTest {
   @Parameterized.Parameters(name = "{0}")
   public static Collection<Object[]> testContextFactory() {
-    return Arrays.asList(new Object[][] {{ArrowTestContext.createFactory()}});
+    return Arrays.asList(
+        new Object[][] {{ArrowTestContext.createFactory()}, {ParquetTestContext.createFactory()}});
   }
 
   public FlushServiceTest(TestContextFactory<?> testContextFactory) {

From 94d2d054a38680684dbe0420ffb7e32e7c699145 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 5 Apr 2023 22:03:55 -0700
Subject: [PATCH 06/29] perf fixes

---
 pom.xml                                       |  18 ++
 .../SnowflakeStreamingIngestExample.java      | 289 ++++++++++++++----
 .../streaming/internal/ArrowRowBuffer.java    |  16 +-
 .../internal/DataValidationUtil.java          |  15 +-
 .../streaming/internal/LiteralQuoteUtils.java |  27 +-
 .../streaming/internal/ParquetRowBuffer.java  |  15 +-
 6 files changed, 273 insertions(+), 107 deletions(-)

diff --git a/pom.xml b/pom.xml
index 50acbf777..20a3d6ee0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -280,6 +280,24 @@
       <artifactId>jackson-databind</artifactId>
     </dependency>
 
+    <!-- https://mvnrepository.com/artifact/com.github.ben-manes.caffeine/caffeine -->
+    <!-- We need to use 2.x version, which is compatible with Java 8 -->
+    <dependency>
+      <groupId>com.github.ben-manes.caffeine</groupId>
+      <artifactId>caffeine</artifactId>
+      <version>2.9.3</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.errorprone</groupId>
+          <artifactId>error_prone_annotations</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.checkerframework</groupId>
+          <artifactId>checker-qual</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
diff --git a/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java b/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java
index 2b12aba9f..8beed80c7 100644
--- a/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java
+++ b/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java
@@ -6,17 +6,20 @@
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Properties;
+import java.util.concurrent.TimeUnit;
 import net.snowflake.ingest.streaming.InsertValidationResponse;
 import net.snowflake.ingest.streaming.OpenChannelRequest;
 import net.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
 import net.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
 import net.snowflake.ingest.streaming.SnowflakeStreamingIngestClientFactory;
+import org.apache.hadoop.util.StopWatch;
 
 /**
  * Example on how to use the Streaming Ingest client APIs.
@@ -24,13 +27,65 @@
  * <p>Please read the README.md file for detailed steps
  */
 public class SnowflakeStreamingIngestExample {
-  // Please follow the example in profile_streaming.json.example to see the required properties, or
-  // if you have already set up profile.json with Snowpipe before, all you need is to add the "role"
-  // property.
+
   private static String PROFILE_PATH = "profile.json";
   private static final ObjectMapper mapper = new ObjectMapper();
 
+  private enum ETabType {
+    VARCHAR,
+    INT,
+    NUM38,
+    DATE
+  }
+
+  // Below are the seeting which we can control.
+
+  /** Indicates how many columns are required for the table */
+  private static final int nDataCols = 30;
+  // Col len must be at least 30
+  /** Indicates column length */
+  private static final int dataColLen = 100;
+  /** Indicates how many rows are needed */
+  private static final int numRows = 2000000;
+
+  /** Indicates the data type for each column */
+  private static final ETabType tabType = ETabType.VARCHAR;
+  /** setting to true will drop the existing table */
+  private boolean DROP_TABLES = true;
+  /** setting to true will create a new table */
+  private boolean CREATE_TABLES = true;
+  /** setting to true will truncate a existing table */
+  private boolean TRUNCATE_TABLES = false;
+
+  /** setting to true will insert data into the table via snowpipe streaming */
+  private boolean INSERT_TABLES = true;
+
+  /** setting to true will use the quotes for the column during table creation and data insert */
+  private static boolean ENABLE_QUOTES = false;
+  /**
+   * setting to true will use the ArrowBuffer. This flag is only needed when using SDK version
+   * >1.1.0
+   */
+  private static boolean USE_ARROW = false;
+
+  // Connection properties
+  private static String USER_NAME = "NOUFALBA";
+  private static String URL = "https://informatica.eu-central-1.snowflakecomputing.com:443";
+  private static String PRIVATE_KEY_FILE_LOCATION = "C:\\snowflake\\key\\rsa_streaming_key.p8";
+  private static String PORT = "443";
+  private static String SCHEME = "https";
+  private static String ROLE = "SYSADMIN";
+  private static String DATA_BASE = "testdb_kafka";
+  private static String SCHEMA = "kafka_test";
+  private static String WARE_HOUSE = "DBMI_WH1";
+  private String pad;
+  private String columnNamesArray[];
+
   public static void main(String[] args) throws Exception {
+    new SnowflakeStreamingIngestExample().doIt();
+  }
+
+  private static Properties getKeysPairAuthParams(boolean isStreamConnection) throws IOException {
     Properties props = new Properties();
     Iterator<Map.Entry<String, JsonNode>> propIt =
         mapper.readTree(new String(Files.readAllBytes(Paths.get(PROFILE_PATH)))).fields();
@@ -38,62 +93,190 @@ public static void main(String[] args) throws Exception {
       Map.Entry<String, JsonNode> prop = propIt.next();
       props.put(prop.getKey(), prop.getValue().asText());
     }
+    return props;
+  }
+
+  public void doIt() throws Exception {
+
+    if (dataColLen < 30) {
+      throw new IllegalArgumentException("Col len must be >=30");
+    }
+
+    if (dataColLen % 10 != 0) {
+      throw new IllegalArgumentException("Col len must be a multiple of 10");
+    }
+
+    final StringBuilder padBuilder = new StringBuilder();
+    for (int i = 0; i < dataColLen; ++i) {
+      padBuilder.append("X");
+    }
+    pad = padBuilder.toString();
+
+    // get all column names and cache it
+    columnNamesArray = new String[nDataCols];
+    for (int i = 0; i < nDataCols; ++i) {
+      columnNamesArray[i] = getColName(i + 1);
+    }
 
-    // Create a streaming ingest client
-    try (SnowflakeStreamingIngestClient client =
-        SnowflakeStreamingIngestClientFactory.builder("MY_CLIENT").setProperties(props).build()) {
-
-      // Create an open channel request on table MY_TABLE, note that the corresponding
-      // db/schema/table needs to be present
-      // Example: create or replace table MY_TABLE(c1 number);
-      OpenChannelRequest request1 =
-          OpenChannelRequest.builder("MY_CHANNEL")
-              .setDBName("MY_DATABASE")
-              .setSchemaName("MY_SCHEMA")
-              .setTableName("MY_TABLE")
-              .setOnErrorOption(
-                  OpenChannelRequest.OnErrorOption.CONTINUE) // Another ON_ERROR option is ABORT
-              .build();
-
-      // Open a streaming ingest channel from the given client
-      SnowflakeStreamingIngestChannel channel1 = client.openChannel(request1);
-
-      // Insert rows into the channel (Using insertRows API)
-      final int totalRowsInTable = 1000;
-      for (int val = 0; val < totalRowsInTable; val++) {
-        Map<String, Object> row = new HashMap<>();
-
-        // c1 corresponds to the column name in table
-        row.put("c1", val);
-
-        // Insert the row with the current offset_token
-        InsertValidationResponse response = channel1.insertRow(row, String.valueOf(val));
-        if (response.hasErrors()) {
-          // Simply throw if there is an exception, or you can do whatever you want with the
-          // erroneous row
-          throw response.getInsertErrors().get(0).getException();
+    if (INSERT_TABLES) {
+      new Inserter().doInserts();
+    }
+
+    System.out.println("Done");
+  }
+
+  private String getColDef() {
+    String colDef;
+    switch (tabType) {
+      case VARCHAR:
+        colDef = String.format("varchar(%s)", dataColLen);
+        break;
+      case NUM38:
+        colDef = String.format("NUMBER(%s)", 38);
+        break;
+      case INT:
+        colDef = "INTEGER";
+        break;
+      case DATE:
+        colDef = "DATE";
+        break;
+      default:
+        throw new RuntimeException("Unsupported : " + tabType);
+    }
+
+    return colDef;
+  }
+
+  private String getFullyQualifiedTableName() {
+    return String.format("%s.%s", SCHEMA, getTabName());
+  }
+
+  private String getTabName() {
+    int tabNum = 1;
+    String tabName;
+
+    switch (tabType) {
+      case VARCHAR:
+        tabName = String.format("tabL%06d", tabNum);
+        break;
+      default:
+        throw new RuntimeException("Unsupported : " + tabType);
+    }
+    return tabName;
+  }
+
+  private String getColName(int colNum) {
+    if (ENABLE_QUOTES) {
+      return wrap(String.format("Col_%04d", colNum));
+    } else {
+      return String.format("Col_%04d", colNum);
+    }
+  }
+
+  public static String wrap(String identifier) {
+    final String quote = "\"";
+    return new StringBuilder(quote).append(identifier).append(quote).toString();
+  }
+
+  ///////////////////////////////
+  private class Inserter {
+
+    public Inserter() {}
+
+    public void doInserts() throws Exception {
+      try (SnowflakeStreamingIngestClient client =
+          SnowflakeStreamingIngestClientFactory.builder("INFA_CLIENT")
+              .setProperties(getKeysPairAuthParams(true))
+              .build()) {
+        // Open a streaming ingest channel from the given client
+        OpenChannelRequest request1 =
+            OpenChannelRequest.builder("MSSQL_TEST_RS_84")
+                .setDBName(DATA_BASE)
+                .setSchemaName(SCHEMA)
+                .setTableName("t_streamingingest")
+                .setOnErrorOption(
+                    OpenChannelRequest.OnErrorOption.CONTINUE) // Another ON_ERROR option is ABORT
+                .build();
+
+        // Open a streaming ingest channel from the given client
+        SnowflakeStreamingIngestChannel channel1 = client.openChannel(request1);
+
+        String previousOffsetTokenFromSnowflake = channel1.getLatestCommittedOffsetToken();
+
+        System.out.println(
+            "==============================================================================");
+        System.out.println(
+            "********************************  STARTING OFFSET IS "
+                + previousOffsetTokenFromSnowflake);
+        System.out.println(
+            "======================================f========================================");
+
+        // Insert rows into the channel (Using insertRows API)
+        StopWatch watch = new StopWatch();
+        watch.start();
+
+        for (int val = 0; val < numRows; val++) {
+          Map<String, Object> row = new HashMap<>();
+          for (int bc = 0; bc < nDataCols; ++bc) {
+
+            row.put(columnNamesArray[bc], buildDataCol());
+          }
+          InsertValidationResponse response = channel1.insertRow(row, String.valueOf(val + 1));
+          if (response.hasErrors()) {
+            // Simply throw if there is an exception, or you can do whatever you want with the
+            // erroneous row
+            throw response.getInsertErrors().get(0).getException();
+          }
         }
+
+        System.out.println("aaaaaaaaa Elapsed Time in Seconds: " + watch.now(TimeUnit.SECONDS));
+
+        // If needed, you can check the offset_token registered in Snowflake to make sure everything
+        // is committed
+        final String expectedOffsetTokenInSnowflake = String.valueOf(numRows);
+        final int maxRetries = 60;
+        int retryCount = 0;
+
+        do {
+          String offsetTokenFromSnowflake = channel1.getLatestCommittedOffsetToken();
+          System.out.println(
+              "==============================================================================");
+          System.out.println(
+              "+++++++++++++++++++++++++++++++++++++++++  CURRENT OFFSET IS "
+                  + offsetTokenFromSnowflake);
+          System.out.println(
+              "==============================================================================");
+          if (offsetTokenFromSnowflake != null
+              && offsetTokenFromSnowflake.equals(String.valueOf(expectedOffsetTokenInSnowflake))) {
+            System.out.println(
+                "==============================================================================");
+            System.out.println(
+                "+++++++++++++++++++++++++++++++++++++++++  SUCCESSFULLY inserted "
+                    + numRows
+                    + " rows");
+            System.out.println(
+                "==============================================================================");
+            break;
+          }
+
+          retryCount++;
+        } while (true);
+        watch.stop();
+        System.out.println("aaaaaaaaa Elapsed Time in Seconds: " + watch.now(TimeUnit.SECONDS));
       }
+    }
 
-      // If needed, you can check the offset_token registered in Snowflake to make sure everything
-      // is committed
-      final int expectedOffsetTokenInSnowflake = totalRowsInTable - 1; // 0 based offset_token
-      final int maxRetries = 10;
-      int retryCount = 0;
-
-      do {
-        String offsetTokenFromSnowflake = channel1.getLatestCommittedOffsetToken();
-        if (offsetTokenFromSnowflake != null
-            && offsetTokenFromSnowflake.equals(String.valueOf(expectedOffsetTokenInSnowflake))) {
-          System.out.println("SUCCESSFULLY inserted " + totalRowsInTable + " rows");
+    private Object buildDataCol() {
+      Object dataVal;
+      switch (tabType) {
+        case VARCHAR:
+          dataVal = pad;
           break;
-        }
-        retryCount++;
-      } while (retryCount < maxRetries);
+        default:
+          throw new RuntimeException("Unsupported : " + tabType);
+      }
 
-      // Close the channel, the function internally will make sure everything is committed (or throw
-      // an exception if there is any issue)
-      channel1.close().get();
+      return dataVal;
     }
   }
 }
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ArrowRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ArrowRowBuffer.java
index c253014c0..31c0ef53e 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ArrowRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ArrowRowBuffer.java
@@ -428,14 +428,9 @@ private float convertRowToArrow(
     // Create new empty stats just for the current row.
     Map<String, RowBufferStats> forkedStatsMap = new HashMap<>();
 
-    // We need to iterate twice over the row and over unquoted names, we store the value to avoid
-    // re-computation
-    Map<String, String> userInputToUnquotedColumnNameMap = new HashMap<>();
-
     for (Map.Entry<String, Object> entry : row.entrySet()) {
       rowBufferSize += 0.125; // 1/8 for null value bitmap
       String columnName = LiteralQuoteUtils.unquoteColumnName(entry.getKey());
-      userInputToUnquotedColumnNameMap.put(entry.getKey(), columnName);
       Object value = entry.getValue();
       Field field = this.fields.get(columnName);
       Utils.assertNotNull("Arrow column field", field);
@@ -737,12 +732,13 @@ private float convertRowToArrow(
 
     // All input values passed validation, iterate over the columns again and combine their existing
     // statistics with the forked statistics for the current row.
-    for (String userInputColumnName : row.keySet()) {
-      String columnName = userInputToUnquotedColumnNameMap.get(userInputColumnName);
-      RowBufferStats stats = statsMap.get(columnName);
-      RowBufferStats forkedStats = forkedStatsMap.get(columnName);
-      statsMap.put(columnName, RowBufferStats.getCombinedStats(stats, forkedStats));
+    for (Map.Entry<String, RowBufferStats> forkedColStats : forkedStatsMap.entrySet()) {
+      String columnName = forkedColStats.getKey();
+      statsMap.put(
+          columnName,
+          RowBufferStats.getCombinedStats(statsMap.get(columnName), forkedColStats.getValue()));
     }
+
     // Insert nulls to the columns that doesn't show up in the input
     for (String columnName : Sets.difference(this.fields.keySet(), inputColumnNames)) {
       rowBufferSize += 0.125; // 1/8 for null value bitmap
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
index d9fd77273..3fac37429 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
@@ -13,10 +13,6 @@
 import com.fasterxml.jackson.databind.ser.std.ToStringSerializer;
 import java.math.BigDecimal;
 import java.math.BigInteger;
-import java.nio.CharBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 import java.time.Instant;
 import java.time.LocalDate;
@@ -846,14 +842,9 @@ private static String sanitizeValueForExceptionMessage(Object value) {
    * UTF-16 surrogate, for example.
    */
   private static void verifyValidUtf8(String input, String columnName, String dataType) {
-    CharsetEncoder charsetEncoder =
-        StandardCharsets.UTF_8
-            .newEncoder()
-            .onMalformedInput(CodingErrorAction.REPORT)
-            .onUnmappableCharacter(CodingErrorAction.REPORT);
-    try {
-      charsetEncoder.encode(CharBuffer.wrap(input));
-    } catch (CharacterCodingException e) {
+    String roundTripStr =
+        new String(input.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
+    if (!input.equals(roundTripStr)) {
       throw valueFormatNotAllowedException(columnName, input, dataType, "Invalid Unicode string");
     }
   }
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
index 74d97cfa0..12fe5cd3a 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
@@ -3,12 +3,8 @@
  */
 package net.snowflake.ingest.streaming.internal;
 
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import java.util.concurrent.ExecutionException;
-import net.snowflake.ingest.utils.ErrorCode;
-import net.snowflake.ingest.utils.SFException;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
 
 /**
  * Util class to normalise literals to match server side metadata.
@@ -26,15 +22,9 @@ class LiteralQuoteUtils {
 
   static {
     unquotedColumnNamesCache =
-        CacheBuilder.newBuilder()
+        Caffeine.newBuilder()
             .maximumSize(UNQUOTED_COLUMN_NAME_CACHE_MAX_SIZE)
-            .build(
-                new CacheLoader<String, String>() {
-                  @Override
-                  public String load(String key) {
-                    return unquoteColumnNameInternal(key);
-                  }
-                });
+            .build(LiteralQuoteUtils::unquoteColumnNameInternal);
   }
 
   /**
@@ -42,14 +32,7 @@ public String load(String key) {
    * expensive. If not, it unquotes directly, otherwise it return a value from a loading cache.
    */
   static String unquoteColumnName(String columnName) {
-    try {
-      return unquotedColumnNamesCache.get(columnName);
-    } catch (ExecutionException e) {
-      throw new SFException(
-          e,
-          ErrorCode.INTERNAL_ERROR,
-          String.format("Exception thrown while unquoting column name %s", columnName));
-    }
+    return unquotedColumnNamesCache.get(columnName);
   }
 
   /**
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
index bc53a61a7..9a37831e8 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
@@ -183,15 +183,10 @@ private float addRow(
     // Create new empty stats just for the current row.
     Map<String, RowBufferStats> forkedStatsMap = new HashMap<>();
 
-    // We need to iterate twice over the row and over unquoted names, we store the value to avoid
-    // re-computation
-    Map<String, String> userInputToUnquotedColumnNameMap = new HashMap<>();
-
     for (Map.Entry<String, Object> entry : row.entrySet()) {
       String key = entry.getKey();
       Object value = entry.getValue();
       String columnName = LiteralQuoteUtils.unquoteColumnName(key);
-      userInputToUnquotedColumnNameMap.put(key, columnName);
       int colIndex = fieldIndex.get(columnName).getSecond();
       RowBufferStats forkedStats = statsMap.get(columnName).forkEmpty();
       forkedStatsMap.put(columnName, forkedStats);
@@ -209,11 +204,11 @@ private float addRow(
 
     // All input values passed validation, iterate over the columns again and combine their existing
     // statistics with the forked statistics for the current row.
-    for (String userInputColumnName : row.keySet()) {
-      String columnName = userInputToUnquotedColumnNameMap.get(userInputColumnName);
-      RowBufferStats stats = statsMap.get(columnName);
-      RowBufferStats forkedStats = forkedStatsMap.get(columnName);
-      statsMap.put(columnName, RowBufferStats.getCombinedStats(stats, forkedStats));
+    for (Map.Entry<String, RowBufferStats> forkedColStats : forkedStatsMap.entrySet()) {
+      String columnName = forkedColStats.getKey();
+      statsMap.put(
+          columnName,
+          RowBufferStats.getCombinedStats(statsMap.get(columnName), forkedColStats.getValue()));
     }
 
     // Increment null count for column missing in the input map

From e8da46b6948a94ae67b6bb6be6533642432a96b2 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 5 Apr 2023 22:07:25 -0700
Subject: [PATCH 07/29] update example

---
 .../SnowflakeStreamingIngestExample.java      | 289 ++++--------------
 1 file changed, 53 insertions(+), 236 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java b/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java
index 8beed80c7..2b12aba9f 100644
--- a/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java
+++ b/src/main/java/net/snowflake/ingest/streaming/example/SnowflakeStreamingIngestExample.java
@@ -6,20 +6,17 @@
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
-import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Properties;
-import java.util.concurrent.TimeUnit;
 import net.snowflake.ingest.streaming.InsertValidationResponse;
 import net.snowflake.ingest.streaming.OpenChannelRequest;
 import net.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
 import net.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
 import net.snowflake.ingest.streaming.SnowflakeStreamingIngestClientFactory;
-import org.apache.hadoop.util.StopWatch;
 
 /**
  * Example on how to use the Streaming Ingest client APIs.
@@ -27,65 +24,13 @@
  * <p>Please read the README.md file for detailed steps
  */
 public class SnowflakeStreamingIngestExample {
-
+  // Please follow the example in profile_streaming.json.example to see the required properties, or
+  // if you have already set up profile.json with Snowpipe before, all you need is to add the "role"
+  // property.
   private static String PROFILE_PATH = "profile.json";
   private static final ObjectMapper mapper = new ObjectMapper();
 
-  private enum ETabType {
-    VARCHAR,
-    INT,
-    NUM38,
-    DATE
-  }
-
-  // Below are the seeting which we can control.
-
-  /** Indicates how many columns are required for the table */
-  private static final int nDataCols = 30;
-  // Col len must be at least 30
-  /** Indicates column length */
-  private static final int dataColLen = 100;
-  /** Indicates how many rows are needed */
-  private static final int numRows = 2000000;
-
-  /** Indicates the data type for each column */
-  private static final ETabType tabType = ETabType.VARCHAR;
-  /** setting to true will drop the existing table */
-  private boolean DROP_TABLES = true;
-  /** setting to true will create a new table */
-  private boolean CREATE_TABLES = true;
-  /** setting to true will truncate a existing table */
-  private boolean TRUNCATE_TABLES = false;
-
-  /** setting to true will insert data into the table via snowpipe streaming */
-  private boolean INSERT_TABLES = true;
-
-  /** setting to true will use the quotes for the column during table creation and data insert */
-  private static boolean ENABLE_QUOTES = false;
-  /**
-   * setting to true will use the ArrowBuffer. This flag is only needed when using SDK version
-   * >1.1.0
-   */
-  private static boolean USE_ARROW = false;
-
-  // Connection properties
-  private static String USER_NAME = "NOUFALBA";
-  private static String URL = "https://informatica.eu-central-1.snowflakecomputing.com:443";
-  private static String PRIVATE_KEY_FILE_LOCATION = "C:\\snowflake\\key\\rsa_streaming_key.p8";
-  private static String PORT = "443";
-  private static String SCHEME = "https";
-  private static String ROLE = "SYSADMIN";
-  private static String DATA_BASE = "testdb_kafka";
-  private static String SCHEMA = "kafka_test";
-  private static String WARE_HOUSE = "DBMI_WH1";
-  private String pad;
-  private String columnNamesArray[];
-
   public static void main(String[] args) throws Exception {
-    new SnowflakeStreamingIngestExample().doIt();
-  }
-
-  private static Properties getKeysPairAuthParams(boolean isStreamConnection) throws IOException {
     Properties props = new Properties();
     Iterator<Map.Entry<String, JsonNode>> propIt =
         mapper.readTree(new String(Files.readAllBytes(Paths.get(PROFILE_PATH)))).fields();
@@ -93,190 +38,62 @@ private static Properties getKeysPairAuthParams(boolean isStreamConnection) thro
       Map.Entry<String, JsonNode> prop = propIt.next();
       props.put(prop.getKey(), prop.getValue().asText());
     }
-    return props;
-  }
-
-  public void doIt() throws Exception {
-
-    if (dataColLen < 30) {
-      throw new IllegalArgumentException("Col len must be >=30");
-    }
-
-    if (dataColLen % 10 != 0) {
-      throw new IllegalArgumentException("Col len must be a multiple of 10");
-    }
-
-    final StringBuilder padBuilder = new StringBuilder();
-    for (int i = 0; i < dataColLen; ++i) {
-      padBuilder.append("X");
-    }
-    pad = padBuilder.toString();
-
-    // get all column names and cache it
-    columnNamesArray = new String[nDataCols];
-    for (int i = 0; i < nDataCols; ++i) {
-      columnNamesArray[i] = getColName(i + 1);
-    }
 
-    if (INSERT_TABLES) {
-      new Inserter().doInserts();
-    }
-
-    System.out.println("Done");
-  }
-
-  private String getColDef() {
-    String colDef;
-    switch (tabType) {
-      case VARCHAR:
-        colDef = String.format("varchar(%s)", dataColLen);
-        break;
-      case NUM38:
-        colDef = String.format("NUMBER(%s)", 38);
-        break;
-      case INT:
-        colDef = "INTEGER";
-        break;
-      case DATE:
-        colDef = "DATE";
-        break;
-      default:
-        throw new RuntimeException("Unsupported : " + tabType);
-    }
-
-    return colDef;
-  }
-
-  private String getFullyQualifiedTableName() {
-    return String.format("%s.%s", SCHEMA, getTabName());
-  }
-
-  private String getTabName() {
-    int tabNum = 1;
-    String tabName;
-
-    switch (tabType) {
-      case VARCHAR:
-        tabName = String.format("tabL%06d", tabNum);
-        break;
-      default:
-        throw new RuntimeException("Unsupported : " + tabType);
-    }
-    return tabName;
-  }
-
-  private String getColName(int colNum) {
-    if (ENABLE_QUOTES) {
-      return wrap(String.format("Col_%04d", colNum));
-    } else {
-      return String.format("Col_%04d", colNum);
-    }
-  }
-
-  public static String wrap(String identifier) {
-    final String quote = "\"";
-    return new StringBuilder(quote).append(identifier).append(quote).toString();
-  }
-
-  ///////////////////////////////
-  private class Inserter {
-
-    public Inserter() {}
-
-    public void doInserts() throws Exception {
-      try (SnowflakeStreamingIngestClient client =
-          SnowflakeStreamingIngestClientFactory.builder("INFA_CLIENT")
-              .setProperties(getKeysPairAuthParams(true))
-              .build()) {
-        // Open a streaming ingest channel from the given client
-        OpenChannelRequest request1 =
-            OpenChannelRequest.builder("MSSQL_TEST_RS_84")
-                .setDBName(DATA_BASE)
-                .setSchemaName(SCHEMA)
-                .setTableName("t_streamingingest")
-                .setOnErrorOption(
-                    OpenChannelRequest.OnErrorOption.CONTINUE) // Another ON_ERROR option is ABORT
-                .build();
-
-        // Open a streaming ingest channel from the given client
-        SnowflakeStreamingIngestChannel channel1 = client.openChannel(request1);
-
-        String previousOffsetTokenFromSnowflake = channel1.getLatestCommittedOffsetToken();
-
-        System.out.println(
-            "==============================================================================");
-        System.out.println(
-            "********************************  STARTING OFFSET IS "
-                + previousOffsetTokenFromSnowflake);
-        System.out.println(
-            "======================================f========================================");
-
-        // Insert rows into the channel (Using insertRows API)
-        StopWatch watch = new StopWatch();
-        watch.start();
-
-        for (int val = 0; val < numRows; val++) {
-          Map<String, Object> row = new HashMap<>();
-          for (int bc = 0; bc < nDataCols; ++bc) {
-
-            row.put(columnNamesArray[bc], buildDataCol());
-          }
-          InsertValidationResponse response = channel1.insertRow(row, String.valueOf(val + 1));
-          if (response.hasErrors()) {
-            // Simply throw if there is an exception, or you can do whatever you want with the
-            // erroneous row
-            throw response.getInsertErrors().get(0).getException();
-          }
+    // Create a streaming ingest client
+    try (SnowflakeStreamingIngestClient client =
+        SnowflakeStreamingIngestClientFactory.builder("MY_CLIENT").setProperties(props).build()) {
+
+      // Create an open channel request on table MY_TABLE, note that the corresponding
+      // db/schema/table needs to be present
+      // Example: create or replace table MY_TABLE(c1 number);
+      OpenChannelRequest request1 =
+          OpenChannelRequest.builder("MY_CHANNEL")
+              .setDBName("MY_DATABASE")
+              .setSchemaName("MY_SCHEMA")
+              .setTableName("MY_TABLE")
+              .setOnErrorOption(
+                  OpenChannelRequest.OnErrorOption.CONTINUE) // Another ON_ERROR option is ABORT
+              .build();
+
+      // Open a streaming ingest channel from the given client
+      SnowflakeStreamingIngestChannel channel1 = client.openChannel(request1);
+
+      // Insert rows into the channel (Using insertRows API)
+      final int totalRowsInTable = 1000;
+      for (int val = 0; val < totalRowsInTable; val++) {
+        Map<String, Object> row = new HashMap<>();
+
+        // c1 corresponds to the column name in table
+        row.put("c1", val);
+
+        // Insert the row with the current offset_token
+        InsertValidationResponse response = channel1.insertRow(row, String.valueOf(val));
+        if (response.hasErrors()) {
+          // Simply throw if there is an exception, or you can do whatever you want with the
+          // erroneous row
+          throw response.getInsertErrors().get(0).getException();
         }
-
-        System.out.println("aaaaaaaaa Elapsed Time in Seconds: " + watch.now(TimeUnit.SECONDS));
-
-        // If needed, you can check the offset_token registered in Snowflake to make sure everything
-        // is committed
-        final String expectedOffsetTokenInSnowflake = String.valueOf(numRows);
-        final int maxRetries = 60;
-        int retryCount = 0;
-
-        do {
-          String offsetTokenFromSnowflake = channel1.getLatestCommittedOffsetToken();
-          System.out.println(
-              "==============================================================================");
-          System.out.println(
-              "+++++++++++++++++++++++++++++++++++++++++  CURRENT OFFSET IS "
-                  + offsetTokenFromSnowflake);
-          System.out.println(
-              "==============================================================================");
-          if (offsetTokenFromSnowflake != null
-              && offsetTokenFromSnowflake.equals(String.valueOf(expectedOffsetTokenInSnowflake))) {
-            System.out.println(
-                "==============================================================================");
-            System.out.println(
-                "+++++++++++++++++++++++++++++++++++++++++  SUCCESSFULLY inserted "
-                    + numRows
-                    + " rows");
-            System.out.println(
-                "==============================================================================");
-            break;
-          }
-
-          retryCount++;
-        } while (true);
-        watch.stop();
-        System.out.println("aaaaaaaaa Elapsed Time in Seconds: " + watch.now(TimeUnit.SECONDS));
       }
-    }
 
-    private Object buildDataCol() {
-      Object dataVal;
-      switch (tabType) {
-        case VARCHAR:
-          dataVal = pad;
+      // If needed, you can check the offset_token registered in Snowflake to make sure everything
+      // is committed
+      final int expectedOffsetTokenInSnowflake = totalRowsInTable - 1; // 0 based offset_token
+      final int maxRetries = 10;
+      int retryCount = 0;
+
+      do {
+        String offsetTokenFromSnowflake = channel1.getLatestCommittedOffsetToken();
+        if (offsetTokenFromSnowflake != null
+            && offsetTokenFromSnowflake.equals(String.valueOf(expectedOffsetTokenInSnowflake))) {
+          System.out.println("SUCCESSFULLY inserted " + totalRowsInTable + " rows");
           break;
-        default:
-          throw new RuntimeException("Unsupported : " + tabType);
-      }
+        }
+        retryCount++;
+      } while (retryCount < maxRetries);
 
-      return dataVal;
+      // Close the channel, the function internally will make sure everything is committed (or throw
+      // an exception if there is any issue)
+      channel1.close().get();
     }
   }
 }

From fdedb236d185c428cb209352e7017e644f1e3631 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Fri, 7 Apr 2023 13:13:16 -0700
Subject: [PATCH 08/29] remove utf8

---
 pom.xml                                       | 18 -------------
 .../internal/DataValidationUtil.java          | 14 ----------
 .../streaming/internal/LiteralQuoteUtils.java | 27 +++++++++++++++----
 3 files changed, 22 insertions(+), 37 deletions(-)

diff --git a/pom.xml b/pom.xml
index 20a3d6ee0..50acbf777 100644
--- a/pom.xml
+++ b/pom.xml
@@ -280,24 +280,6 @@
       <artifactId>jackson-databind</artifactId>
     </dependency>
 
-    <!-- https://mvnrepository.com/artifact/com.github.ben-manes.caffeine/caffeine -->
-    <!-- We need to use 2.x version, which is compatible with Java 8 -->
-    <dependency>
-      <groupId>com.github.ben-manes.caffeine</groupId>
-      <artifactId>caffeine</artifactId>
-      <version>2.9.3</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.errorprone</groupId>
-          <artifactId>error_prone_annotations</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.checkerframework</groupId>
-          <artifactId>checker-qual</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
index 3fac37429..239fad56d 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
@@ -97,7 +97,6 @@ private static JsonNode validateAndParseSemiStructuredAsJsonTree(
       String columnName, Object input, String snowflakeType) {
     if (input instanceof String) {
       String stringInput = (String) input;
-      verifyValidUtf8(stringInput, columnName, snowflakeType);
       try {
         return objectMapper.readTree(stringInput);
       } catch (JsonProcessingException e) {
@@ -461,7 +460,6 @@ static String validateAndParseString(
     String output;
     if (input instanceof String) {
       output = (String) input;
-      verifyValidUtf8(output, columnName, "STRING");
     } else if (input instanceof Number) {
       output = new BigDecimal(input.toString()).stripTrailingZeros().toPlainString();
     } else if (input instanceof Boolean || input instanceof Character) {
@@ -836,16 +834,4 @@ private static String sanitizeValueForExceptionMessage(Object value) {
     String valueString = value.toString();
     return valueString.length() <= maxSize ? valueString : valueString.substring(0, 20) + "...";
   }
-
-  /**
-   * Validates that a string is valid UTF-8 string. It catches situations like unmatched high/low
-   * UTF-16 surrogate, for example.
-   */
-  private static void verifyValidUtf8(String input, String columnName, String dataType) {
-    String roundTripStr =
-        new String(input.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
-    if (!input.equals(roundTripStr)) {
-      throw valueFormatNotAllowedException(columnName, input, dataType, "Invalid Unicode string");
-    }
-  }
 }
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
index 12fe5cd3a..74d97cfa0 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
@@ -3,8 +3,12 @@
  */
 package net.snowflake.ingest.streaming.internal;
 
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import java.util.concurrent.ExecutionException;
+import net.snowflake.ingest.utils.ErrorCode;
+import net.snowflake.ingest.utils.SFException;
 
 /**
  * Util class to normalise literals to match server side metadata.
@@ -22,9 +26,15 @@ class LiteralQuoteUtils {
 
   static {
     unquotedColumnNamesCache =
-        Caffeine.newBuilder()
+        CacheBuilder.newBuilder()
             .maximumSize(UNQUOTED_COLUMN_NAME_CACHE_MAX_SIZE)
-            .build(LiteralQuoteUtils::unquoteColumnNameInternal);
+            .build(
+                new CacheLoader<String, String>() {
+                  @Override
+                  public String load(String key) {
+                    return unquoteColumnNameInternal(key);
+                  }
+                });
   }
 
   /**
@@ -32,7 +42,14 @@ class LiteralQuoteUtils {
    * expensive. If not, it unquotes directly, otherwise it return a value from a loading cache.
    */
   static String unquoteColumnName(String columnName) {
-    return unquotedColumnNamesCache.get(columnName);
+    try {
+      return unquotedColumnNamesCache.get(columnName);
+    } catch (ExecutionException e) {
+      throw new SFException(
+          e,
+          ErrorCode.INTERNAL_ERROR,
+          String.format("Exception thrown while unquoting column name %s", columnName));
+    }
   }
 
   /**

From 1190275dabc78d23f0f489819b9186b475fa3548 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Fri, 7 Apr 2023 13:50:14 -0700
Subject: [PATCH 09/29] Revert "remove utf8"

This reverts commit fdedb236d185c428cb209352e7017e644f1e3631.
---
 pom.xml                                       | 18 +++++++++++++
 .../internal/DataValidationUtil.java          | 14 ++++++++++
 .../streaming/internal/LiteralQuoteUtils.java | 27 ++++---------------
 3 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/pom.xml b/pom.xml
index 50acbf777..20a3d6ee0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -280,6 +280,24 @@
       <artifactId>jackson-databind</artifactId>
     </dependency>
 
+    <!-- https://mvnrepository.com/artifact/com.github.ben-manes.caffeine/caffeine -->
+    <!-- We need to use 2.x version, which is compatible with Java 8 -->
+    <dependency>
+      <groupId>com.github.ben-manes.caffeine</groupId>
+      <artifactId>caffeine</artifactId>
+      <version>2.9.3</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.errorprone</groupId>
+          <artifactId>error_prone_annotations</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.checkerframework</groupId>
+          <artifactId>checker-qual</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
index 239fad56d..3fac37429 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
@@ -97,6 +97,7 @@ private static JsonNode validateAndParseSemiStructuredAsJsonTree(
       String columnName, Object input, String snowflakeType) {
     if (input instanceof String) {
       String stringInput = (String) input;
+      verifyValidUtf8(stringInput, columnName, snowflakeType);
       try {
         return objectMapper.readTree(stringInput);
       } catch (JsonProcessingException e) {
@@ -460,6 +461,7 @@ static String validateAndParseString(
     String output;
     if (input instanceof String) {
       output = (String) input;
+      verifyValidUtf8(output, columnName, "STRING");
     } else if (input instanceof Number) {
       output = new BigDecimal(input.toString()).stripTrailingZeros().toPlainString();
     } else if (input instanceof Boolean || input instanceof Character) {
@@ -834,4 +836,16 @@ private static String sanitizeValueForExceptionMessage(Object value) {
     String valueString = value.toString();
     return valueString.length() <= maxSize ? valueString : valueString.substring(0, 20) + "...";
   }
+
+  /**
+   * Validates that a string is valid UTF-8 string. It catches situations like unmatched high/low
+   * UTF-16 surrogate, for example.
+   */
+  private static void verifyValidUtf8(String input, String columnName, String dataType) {
+    String roundTripStr =
+        new String(input.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
+    if (!input.equals(roundTripStr)) {
+      throw valueFormatNotAllowedException(columnName, input, dataType, "Invalid Unicode string");
+    }
+  }
 }
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
index 74d97cfa0..12fe5cd3a 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
@@ -3,12 +3,8 @@
  */
 package net.snowflake.ingest.streaming.internal;
 
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import java.util.concurrent.ExecutionException;
-import net.snowflake.ingest.utils.ErrorCode;
-import net.snowflake.ingest.utils.SFException;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
 
 /**
  * Util class to normalise literals to match server side metadata.
@@ -26,15 +22,9 @@ class LiteralQuoteUtils {
 
   static {
     unquotedColumnNamesCache =
-        CacheBuilder.newBuilder()
+        Caffeine.newBuilder()
             .maximumSize(UNQUOTED_COLUMN_NAME_CACHE_MAX_SIZE)
-            .build(
-                new CacheLoader<String, String>() {
-                  @Override
-                  public String load(String key) {
-                    return unquoteColumnNameInternal(key);
-                  }
-                });
+            .build(LiteralQuoteUtils::unquoteColumnNameInternal);
   }
 
   /**
@@ -42,14 +32,7 @@ public String load(String key) {
    * expensive. If not, it unquotes directly, otherwise it return a value from a loading cache.
    */
   static String unquoteColumnName(String columnName) {
-    try {
-      return unquotedColumnNamesCache.get(columnName);
-    } catch (ExecutionException e) {
-      throw new SFException(
-          e,
-          ErrorCode.INTERNAL_ERROR,
-          String.format("Exception thrown while unquoting column name %s", columnName));
-    }
+    return unquotedColumnNamesCache.get(columnName);
   }
 
   /**

From 96db6395d8fed621da2972348283dd813c8d29ed Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Fri, 7 Apr 2023 14:01:26 -0700
Subject: [PATCH 10/29] fix

---
 pom.xml                                       | 18 -------------
 .../streaming/internal/LiteralQuoteUtils.java | 27 +++++++++++++++----
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/pom.xml b/pom.xml
index 20a3d6ee0..50acbf777 100644
--- a/pom.xml
+++ b/pom.xml
@@ -280,24 +280,6 @@
       <artifactId>jackson-databind</artifactId>
     </dependency>
 
-    <!-- https://mvnrepository.com/artifact/com.github.ben-manes.caffeine/caffeine -->
-    <!-- We need to use 2.x version, which is compatible with Java 8 -->
-    <dependency>
-      <groupId>com.github.ben-manes.caffeine</groupId>
-      <artifactId>caffeine</artifactId>
-      <version>2.9.3</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.errorprone</groupId>
-          <artifactId>error_prone_annotations</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.checkerframework</groupId>
-          <artifactId>checker-qual</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
index 12fe5cd3a..74d97cfa0 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
@@ -3,8 +3,12 @@
  */
 package net.snowflake.ingest.streaming.internal;
 
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import java.util.concurrent.ExecutionException;
+import net.snowflake.ingest.utils.ErrorCode;
+import net.snowflake.ingest.utils.SFException;
 
 /**
  * Util class to normalise literals to match server side metadata.
@@ -22,9 +26,15 @@ class LiteralQuoteUtils {
 
   static {
     unquotedColumnNamesCache =
-        Caffeine.newBuilder()
+        CacheBuilder.newBuilder()
             .maximumSize(UNQUOTED_COLUMN_NAME_CACHE_MAX_SIZE)
-            .build(LiteralQuoteUtils::unquoteColumnNameInternal);
+            .build(
+                new CacheLoader<String, String>() {
+                  @Override
+                  public String load(String key) {
+                    return unquoteColumnNameInternal(key);
+                  }
+                });
   }
 
   /**
@@ -32,7 +42,14 @@ class LiteralQuoteUtils {
    * expensive. If not, it unquotes directly, otherwise it return a value from a loading cache.
    */
   static String unquoteColumnName(String columnName) {
-    return unquotedColumnNamesCache.get(columnName);
+    try {
+      return unquotedColumnNamesCache.get(columnName);
+    } catch (ExecutionException e) {
+      throw new SFException(
+          e,
+          ErrorCode.INTERNAL_ERROR,
+          String.format("Exception thrown while unquoting column name %s", columnName));
+    }
   }
 
   /**

From e4ef9220111cd21b67ceef1536a43b8c24021cb8 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Fri, 7 Apr 2023 17:01:41 -0700
Subject: [PATCH 11/29] fix tests

---
 .../ingest/streaming/internal/datatypes/StringsIT.java          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java
index f41e24dc1..0163ae9eb 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java
@@ -217,7 +217,7 @@ public void testCollatedColumnsNotSupported() throws SQLException {
       openChannel(tableName);
       Assert.fail("Opening a channel shouldn't have succeeded");
     } catch (SFException e) {
-      Assert.assertEquals(ErrorCode.UNSUPPORTED_DATA_TYPE.getMessageCode(), e.getVendorCode());
+      Assert.assertEquals(ErrorCode.INVALID_ROW.getMessageCode(), e.getVendorCode());
     }
   }
 }

From 2160a360a81e120bacf04c725dd068b57168b4b4 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Fri, 7 Apr 2023 18:26:54 -0700
Subject: [PATCH 12/29] fix tests

---
 .../ingest/streaming/internal/datatypes/StringsIT.java          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java
index 0163ae9eb..cfa59e0b1 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/StringsIT.java
@@ -217,7 +217,7 @@ public void testCollatedColumnsNotSupported() throws SQLException {
       openChannel(tableName);
       Assert.fail("Opening a channel shouldn't have succeeded");
     } catch (SFException e) {
-      Assert.assertEquals(ErrorCode.INVALID_ROW.getMessageCode(), e.getVendorCode());
+      Assert.assertEquals(ErrorCode.OPEN_CHANNEL_FAILURE.getMessageCode(), e.getVendorCode());
     }
   }
 }

From 4146bfbec62c240737f18a61e78fd94ba9369697 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Tue, 11 Apr 2023 15:42:03 -0700
Subject: [PATCH 13/29] use dynamic scaling thread pool

---
 .../ingest/streaming/internal/FlushService.java          | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 7758e30e6..150d020b6 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -31,6 +31,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -319,7 +320,13 @@ private void createWorkers() {
                 * (1 + this.owningClient.getParameterProvider().getIOTimeCpuRatio()),
             MAX_THREAD_COUNT);
     this.buildUploadWorkers =
-        Executors.newFixedThreadPool(buildUploadThreadCount, buildUploadThreadFactory);
+        new ThreadPoolExecutor(
+            1,
+            buildUploadThreadCount,
+            60L,
+            TimeUnit.SECONDS,
+            new SynchronousQueue<Runnable>(),
+            buildUploadThreadFactory);
 
     logger.logInfo(
         "Create {} threads for build/upload blobs for client={}, total available processors={}",

From 0344b70d891c4a4befd959d5f9631420480e3a03 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Tue, 11 Apr 2023 16:05:21 -0700
Subject: [PATCH 14/29] file format

---
 .../ingest/streaming/internal/DataValidationUtilTest.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
index f0f071c38..6f3dd4558 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
@@ -990,8 +990,8 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index: 0,"
-            + " reason: Not a valid boolean, see"
+            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index: 0, reason:"
+            + " Not a valid boolean, see"
             + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean"
             + " for the list of supported formats",
         () -> validateAndParseBoolean("COL", "abc", 0));

From 65e16f4cc45818ccbadab1a7e9c65b1799c56b0b Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Tue, 16 May 2023 21:27:10 -0700
Subject: [PATCH 15/29] fix

---
 .../ingest/streaming/internal/FlushService.java          | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 150d020b6..7758e30e6 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -31,7 +31,6 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -320,13 +319,7 @@ private void createWorkers() {
                 * (1 + this.owningClient.getParameterProvider().getIOTimeCpuRatio()),
             MAX_THREAD_COUNT);
     this.buildUploadWorkers =
-        new ThreadPoolExecutor(
-            1,
-            buildUploadThreadCount,
-            60L,
-            TimeUnit.SECONDS,
-            new SynchronousQueue<Runnable>(),
-            buildUploadThreadFactory);
+        Executors.newFixedThreadPool(buildUploadThreadCount, buildUploadThreadFactory);
 
     logger.logInfo(
         "Create {} threads for build/upload blobs for client={}, total available processors={}",

From 025958c49ffb3407942da24c9ff3a7ec2e45c111 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Tue, 16 May 2023 21:57:58 -0700
Subject: [PATCH 16/29] update max chunk size

---
 .../streaming/internal/AbstractRowBuffer.java |  6 ++++--
 .../streaming/internal/ParquetFlusher.java    |  8 +++++--
 .../streaming/internal/ParquetRowBuffer.java  | 21 ++++++++++++-------
 ...owflakeStreamingIngestChannelInternal.java |  9 +++++---
 .../net/snowflake/ingest/utils/Constants.java |  1 -
 .../ingest/utils/ParameterProvider.java       | 13 +++++++++++-
 .../parquet/hadoop/BdecParquetWriter.java     | 13 ++++++------
 .../internal/ParameterProviderTest.java       |  5 +++++
 .../streaming/internal/RowBufferTest.java     |  4 +++-
 9 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
index 9156f9c4b..0580006d0 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
@@ -559,7 +559,8 @@ static <T> AbstractRowBuffer<T> createRowBuffer(
       String fullyQualifiedChannelName,
       Consumer<Float> rowSizeMetric,
       ChannelRuntimeState channelRuntimeState,
-      boolean enableParquetMemoryOptimization) {
+      boolean enableParquetMemoryOptimization,
+      long maxChunkSizeInBytes) {
     switch (bdecVersion) {
       case ONE:
         //noinspection unchecked
@@ -581,7 +582,8 @@ static <T> AbstractRowBuffer<T> createRowBuffer(
                 fullyQualifiedChannelName,
                 rowSizeMetric,
                 channelRuntimeState,
-                enableParquetMemoryOptimization);
+                enableParquetMemoryOptimization,
+                maxChunkSizeInBytes);
       default:
         throw new SFException(
             ErrorCode.INTERNAL_ERROR, "Unsupported BDEC format version: " + bdecVersion);
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
index 0ac28326d..b0c14b12f 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
@@ -25,14 +25,17 @@ public class ParquetFlusher implements Flusher<ParquetChunkData> {
   private static final Logging logger = new Logging(ParquetFlusher.class);
   private final MessageType schema;
   private final boolean enableParquetInternalBuffering;
+  private final long maxChunkSizeInBytes;
 
   /**
    * Construct parquet flusher from its schema and set flag that indicates whether Parquet memory
    * optimization is enabled, i.e. rows will be buffered in internal Parquet buffer.
    */
-  public ParquetFlusher(MessageType schema, boolean enableParquetInternalBuffering) {
+  public ParquetFlusher(
+      MessageType schema, boolean enableParquetInternalBuffering, long maxChunkSizeInBytes) {
     this.schema = schema;
     this.enableParquetInternalBuffering = enableParquetInternalBuffering;
+    this.maxChunkSizeInBytes = maxChunkSizeInBytes;
   }
 
   @Override
@@ -194,7 +197,8 @@ private SerializationResult serializeFromJavaObjects(
 
     Map<String, String> metadata = channelsDataPerTable.get(0).getVectors().metadata;
     parquetWriter =
-        new BdecParquetWriter(mergedData, schema, metadata, firstChannelFullyQualifiedTableName);
+        new BdecParquetWriter(
+            mergedData, schema, metadata, firstChannelFullyQualifiedTableName, maxChunkSizeInBytes);
     rows.forEach(parquetWriter::writeRow);
     parquetWriter.close();
 
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
index a45a52b93..73774d080 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
@@ -52,6 +52,8 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
 
   private MessageType schema;
   private final boolean enableParquetInternalBuffering;
+  private final long maxChunkSizeInBytes;
+
   /** Construct a ParquetRowBuffer object. */
   ParquetRowBuffer(
       OpenChannelRequest.OnErrorOption onErrorOption,
@@ -60,7 +62,8 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
       String fullyQualifiedChannelName,
       Consumer<Float> rowSizeMetric,
       ChannelRuntimeState channelRuntimeState,
-      boolean enableParquetInternalBuffering) {
+      boolean enableParquetInternalBuffering,
+      long maxChunkSizeInBytes) {
     super(
         onErrorOption,
         defaultTimezone,
@@ -68,12 +71,13 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
         fullyQualifiedChannelName,
         rowSizeMetric,
         channelRuntimeState);
-    fieldIndex = new HashMap<>();
-    metadata = new HashMap<>();
-    data = new ArrayList<>();
-    tempData = new ArrayList<>();
-    channelName = fullyQualifiedChannelName;
+    this.fieldIndex = new HashMap<>();
+    this.metadata = new HashMap<>();
+    this.data = new ArrayList<>();
+    this.tempData = new ArrayList<>();
+    this.channelName = fullyQualifiedChannelName;
     this.enableParquetInternalBuffering = enableParquetInternalBuffering;
+    this.maxChunkSizeInBytes = maxChunkSizeInBytes;
   }
 
   @Override
@@ -117,7 +121,8 @@ private void createFileWriter() {
     fileOutput = new ByteArrayOutputStream();
     try {
       if (enableParquetInternalBuffering) {
-        bdecParquetWriter = new BdecParquetWriter(fileOutput, schema, metadata, channelName);
+        bdecParquetWriter =
+            new BdecParquetWriter(fileOutput, schema, metadata, channelName, maxChunkSizeInBytes);
       } else {
         this.bdecParquetWriter = null;
       }
@@ -305,7 +310,7 @@ void closeInternal() {
 
   @Override
   public Flusher<ParquetChunkData> createFlusher() {
-    return new ParquetFlusher(schema, enableParquetInternalBuffering);
+    return new ParquetFlusher(schema, enableParquetInternalBuffering, maxChunkSizeInBytes);
   }
 
   private static class ParquetColumn {
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java b/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java
index 3916bc0d9..f4c4c6952 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java
@@ -5,7 +5,6 @@
 package net.snowflake.ingest.streaming.internal;
 
 import static net.snowflake.ingest.utils.Constants.INSERT_THROTTLE_MAX_RETRY_COUNT;
-import static net.snowflake.ingest.utils.Constants.MAX_CHUNK_SIZE_IN_BYTES;
 import static net.snowflake.ingest.utils.Constants.RESPONSE_SUCCESS;
 import static net.snowflake.ingest.utils.ParameterProvider.MAX_MEMORY_LIMIT_IN_BYTES_DEFAULT;
 
@@ -131,7 +130,10 @@ class SnowflakeStreamingIngestChannelInternal<T> implements SnowflakeStreamingIn
             channelState,
             owningClient != null
                 ? owningClient.getParameterProvider().getEnableParquetInternalBuffering()
-                : ParameterProvider.ENABLE_PARQUET_INTERNAL_BUFFERING_DEFAULT);
+                : ParameterProvider.ENABLE_PARQUET_INTERNAL_BUFFERING_DEFAULT,
+            owningClient != null
+                ? owningClient.getParameterProvider().getMaxChunkSizeInBytes()
+                : ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
     logger.logInfo(
         "Channel={} created for table={}",
         this.channelFlushContext.getName(),
@@ -365,7 +367,8 @@ public InsertValidationResponse insertRows(
     // Start flush task if the chunk size reaches a certain size
     // TODO: Checking table/chunk level size reduces throughput a lot, we may want to check it only
     // if a large number of rows are inserted
-    if (this.rowBuffer.getSize() >= MAX_CHUNK_SIZE_IN_BYTES) {
+    if (this.rowBuffer.getSize()
+        >= this.owningClient.getParameterProvider().getMaxChunkSizeInBytes()) {
       this.owningClient.setNeedFlush();
     }
 
diff --git a/src/main/java/net/snowflake/ingest/utils/Constants.java b/src/main/java/net/snowflake/ingest/utils/Constants.java
index e4e399978..050f14264 100644
--- a/src/main/java/net/snowflake/ingest/utils/Constants.java
+++ b/src/main/java/net/snowflake/ingest/utils/Constants.java
@@ -34,7 +34,6 @@ public class Constants {
   public static final int BLOB_UPLOAD_TIMEOUT_IN_SEC = 5;
   public static final int INSERT_THROTTLE_MAX_RETRY_COUNT = 60;
   public static final long MAX_BLOB_SIZE_IN_BYTES = 256000000L;
-  public static final long MAX_CHUNK_SIZE_IN_BYTES = 16000000L;
   public static final int BLOB_TAG_SIZE_IN_BYTES = 4;
   public static final int BLOB_VERSION_SIZE_IN_BYTES = 1;
   public static final int BLOB_FILE_SIZE_SIZE_IN_BYTES = 8;
diff --git a/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java b/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
index 11f9fbeac..e2f38f476 100644
--- a/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
+++ b/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
@@ -16,7 +16,6 @@ public class ParameterProvider {
       "STREAMING_INGEST_CLIENT_SDK_INSERT_THROTTLE_THRESHOLD_IN_PERCENTAGE".toLowerCase();
   public static final String INSERT_THROTTLE_THRESHOLD_IN_BYTES =
       "STREAMING_INGEST_CLIENT_SDK_INSERT_THROTTLE_THRESHOLD_IN_BYTES".toLowerCase();
-
   public static final String ENABLE_SNOWPIPE_STREAMING_METRICS =
       "ENABLE_SNOWPIPE_STREAMING_JMX_METRICS".toLowerCase();
   public static final String BLOB_FORMAT_VERSION = "BLOB_FORMAT_VERSION".toLowerCase();
@@ -26,6 +25,7 @@ public class ParameterProvider {
   public static final String MAX_MEMORY_LIMIT_IN_BYTES = "MAX_MEMORY_LIMIT_IN_BYTES".toLowerCase();
   public static final String ENABLE_PARQUET_INTERNAL_BUFFERING =
       "ENABLE_PARQUET_INTERNAL_BUFFERING".toLowerCase();
+  public static final String MAX_CHUNK_SIZE_IN_BYTES = "MAX_CHUNK_SIZE_IN_BYTES".toLowerCase();
 
   // Default values
   public static final long BUFFER_FLUSH_INTERVAL_IN_MILLIS_DEFAULT = 1000;
@@ -39,6 +39,7 @@ public class ParameterProvider {
   public static final int IO_TIME_CPU_RATIO_DEFAULT = 2;
   public static final int BLOB_UPLOAD_MAX_RETRY_COUNT_DEFAULT = 24;
   public static final long MAX_MEMORY_LIMIT_IN_BYTES_DEFAULT = -1L;
+  public static final long MAX_CHUNK_SIZE_IN_BYTES_DEFAULT = 32000000L;
 
   /* Parameter that enables using internal Parquet buffers for buffering of rows before serializing.
   It reduces memory consumption compared to using Java Objects for buffering.*/
@@ -136,6 +137,9 @@ private void setParameterMap(Map<String, Object> parameterOverrides, Properties
         ENABLE_PARQUET_INTERNAL_BUFFERING_DEFAULT,
         parameterOverrides,
         props);
+
+    this.updateValue(
+        MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT, parameterOverrides, props);
   }
 
   /** @return Longest interval in milliseconds between buffer flushes */
@@ -261,6 +265,13 @@ public boolean getEnableParquetInternalBuffering() {
     return (val instanceof String) ? Boolean.parseBoolean(val.toString()) : (boolean) val;
   }
 
+  /** @return The max chunk size in bytes */
+  public long getMaxChunkSizeInBytes() {
+    Object val =
+        this.parameterMap.getOrDefault(MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
+    return (val instanceof String) ? Long.parseLong(val.toString()) : (long) val;
+  }
+
   @Override
   public String toString() {
     return "ParameterProvider{" + "parameterMap=" + parameterMap + '}';
diff --git a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
index 9122e20be..55ab1bf30 100644
--- a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
+++ b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
@@ -4,8 +4,6 @@
 
 package org.apache.parquet.hadoop;
 
-import static net.snowflake.ingest.utils.Constants.MAX_CHUNK_SIZE_IN_BYTES;
-
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.List;
@@ -52,9 +50,10 @@ public BdecParquetWriter(
       ByteArrayOutputStream stream,
       MessageType schema,
       Map<String, String> extraMetaData,
-      String channelName)
+      String channelName,
+      long maxChunkSizeInBytes)
       throws IOException {
-    OutputFile file = new ByteArrayOutputFile(stream);
+    OutputFile file = new ByteArrayOutputFile(stream, maxChunkSizeInBytes);
     ParquetProperties encodingProps = createParquetProperties();
     Configuration conf = new Configuration();
     WriteSupport<List<Object>> writeSupport =
@@ -166,9 +165,11 @@ private static ParquetProperties createParquetProperties() {
    */
   private static class ByteArrayOutputFile implements OutputFile {
     private final ByteArrayOutputStream stream;
+    private final long maxChunkSizeInBytes;
 
-    private ByteArrayOutputFile(ByteArrayOutputStream stream) {
+    private ByteArrayOutputFile(ByteArrayOutputStream stream, long maxChunkSizeInBytes) {
       this.stream = stream;
+      this.maxChunkSizeInBytes = maxChunkSizeInBytes;
     }
 
     @Override
@@ -189,7 +190,7 @@ public boolean supportsBlockSize() {
 
     @Override
     public long defaultBlockSize() {
-      return (int) MAX_CHUNK_SIZE_IN_BYTES;
+      return maxChunkSizeInBytes;
     }
   }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java
index f7f8da84f..1fe034635 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java
@@ -21,6 +21,7 @@ public void withValuesSet() {
     parameterMap.put(ParameterProvider.IO_TIME_CPU_RATIO, 10);
     parameterMap.put(ParameterProvider.BLOB_UPLOAD_MAX_RETRY_COUNT, 100);
     parameterMap.put(ParameterProvider.MAX_MEMORY_LIMIT_IN_BYTES, 1000L);
+    parameterMap.put(ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES, 1000000L);
     ParameterProvider parameterProvider = new ParameterProvider(parameterMap, prop);
 
     Assert.assertEquals(3L, parameterProvider.getBufferFlushIntervalInMs());
@@ -31,6 +32,7 @@ public void withValuesSet() {
     Assert.assertEquals(10, parameterProvider.getIOTimeCpuRatio());
     Assert.assertEquals(100, parameterProvider.getBlobUploadMaxRetryCount());
     Assert.assertEquals(1000L, parameterProvider.getMaxMemoryLimitInBytes());
+    Assert.assertEquals(1000000L, parameterProvider.getMaxChunkSizeInBytes());
   }
 
   @Test
@@ -117,5 +119,8 @@ public void withDefaultValues() {
     Assert.assertEquals(
         ParameterProvider.MAX_MEMORY_LIMIT_IN_BYTES_DEFAULT,
         parameterProvider.getMaxMemoryLimitInBytes());
+    Assert.assertEquals(
+        ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT,
+        parameterProvider.getMaxChunkSizeInBytes());
   }
 }
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
index 55df1eb21..b6143e6ce 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
@@ -1,6 +1,7 @@
 package net.snowflake.ingest.streaming.internal;
 
 import static java.time.ZoneOffset.UTC;
+import static net.snowflake.ingest.utils.ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
@@ -131,7 +132,8 @@ private AbstractRowBuffer<?> createTestBuffer(OpenChannelRequest.OnErrorOption o
         "test.buffer",
         rs -> {},
         initialState,
-        enableParquetMemoryOptimization);
+        enableParquetMemoryOptimization,
+        MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
   }
 
   @Test

From e43c354901b1385be6f73f1a832516c838f8d784 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 17 May 2023 23:04:44 -0700
Subject: [PATCH 17/29] fix schema issue

---
 .../streaming/internal/FlushService.java      | 35 ++++---
 .../streaming/internal/FlushServiceTest.java  | 94 +++++++++++++++----
 2 files changed, 100 insertions(+), 29 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 7758e30e6..39e17381c 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -373,22 +373,16 @@ void distributeFlushTasks() {
           int idx = 0;
           while (idx < channelsDataPerTable.size()) {
             ChannelData<T> channelData = channelsDataPerTable.get(idx);
-            // Stop processing the rest of channels if reaching the blob size limit or the channel
-            // has different encryption key ids
+            // Stop processing the rest of channels when needed
             if (idx > 0
-                && (totalBufferSizeInBytes + channelData.getBufferSize() > MAX_BLOB_SIZE_IN_BYTES
-                    || !Objects.equals(
-                        channelData.getChannelContext().getEncryptionKeyId(),
-                        channelsDataPerTable
-                            .get(idx - 1)
-                            .getChannelContext()
-                            .getEncryptionKeyId()))) {
+                && shouldStopProcessing(
+                    totalBufferSizeInBytes, channelData, channelsDataPerTable.get(idx - 1))) {
               leftoverChannelsDataPerTable.addAll(
                   channelsDataPerTable.subList(idx, channelsDataPerTable.size()));
               logger.logInfo(
                   "Creation of another blob is needed because of blob size limit or different"
-                      + " encryption ids, client={}, table={},  size={}, encryptionId1={},"
-                      + " encryptionId2={}",
+                      + " encryption ids or different schema, client={}, table={},  size={},"
+                      + " encryptionId1={}, encryptionId2={}",
                   this.owningClient.getName(),
                   channelData.getChannelContext().getTableName(),
                   totalBufferSizeInBytes + channelData.getBufferSize(),
@@ -471,6 +465,25 @@ void distributeFlushTasks() {
     this.registerService.addBlobs(blobs);
   }
 
+  /**
+   * Check whether we should stop merging more channels into the chunks, we need to stop in a few
+   * cases
+   *
+   * <p>When the size is larger than a certain threshold
+   *
+   * <p>When the encryption key ids are not the same
+   *
+   * <p>When the schema is not the same
+   */
+  private boolean shouldStopProcessing(
+      float totalBufferSizeInBytes, ChannelData<T> current, ChannelData<T> prev) {
+    return totalBufferSizeInBytes + current.getBufferSize() > MAX_BLOB_SIZE_IN_BYTES
+        || !Objects.equals(
+            current.getChannelContext().getEncryptionKeyId(),
+            prev.getChannelContext().getEncryptionKeyId())
+        || !current.getColumnEps().keySet().equals(prev.getColumnEps().keySet());
+  }
+
   /**
    * Builds and uploads file to cloud storage.
    *
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index 0c485b94b..c75b16748 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -396,9 +396,9 @@ private SnowflakeStreamingIngestChannelInternal<?> addChannel4(TestContext<?> te
         .buildAndAdd();
   }
 
-  private static ColumnMetadata createTestIntegerColumn() {
+  private static ColumnMetadata createTestIntegerColumn(String name) {
     ColumnMetadata colInt = new ColumnMetadata();
-    colInt.setName("COLINT");
+    colInt.setName(name);
     colInt.setPhysicalType("SB4");
     colInt.setNullable(true);
     colInt.setLogicalType("FIXED");
@@ -407,9 +407,9 @@ private static ColumnMetadata createTestIntegerColumn() {
     return colInt;
   }
 
-  private static ColumnMetadata createTestTextColumn() {
+  private static ColumnMetadata createTestTextColumn(String name) {
     ColumnMetadata colChar = new ColumnMetadata();
-    colChar.setName("COLCHAR");
+    colChar.setName(name);
     colChar.setPhysicalType("LOB");
     colChar.setNullable(true);
     colChar.setLogicalType("TEXT");
@@ -486,19 +486,22 @@ public void testBlobCreation() throws Exception {
     SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
     SnowflakeStreamingIngestChannelInternal<?> channel2 = addChannel2(testContext);
     SnowflakeStreamingIngestChannelInternal<?> channel4 = addChannel4(testContext);
+    String colName1 = "testBlobCreation1";
+    String colName2 = "testBlobCreation2";
 
-    List<ColumnMetadata> schema = Arrays.asList(createTestIntegerColumn(), createTestTextColumn());
+    List<ColumnMetadata> schema =
+        Arrays.asList(createTestIntegerColumn(colName1), createTestTextColumn(colName2));
     channel1.getRowBuffer().setupSchema(schema);
     channel2.getRowBuffer().setupSchema(schema);
     channel4.getRowBuffer().setupSchema(schema);
 
     List<Map<String, Object>> rows1 =
         RowSetBuilder.newBuilder()
-            .addColumn("COLINT", 11)
-            .addColumn("COLCHAR", "bob")
+            .addColumn(colName1, 11)
+            .addColumn(colName2, "bob")
             .newRow()
-            .addColumn("COLINT", 22)
-            .addColumn("COLCHAR", "bob")
+            .addColumn(colName1, 22)
+            .addColumn(colName2, "bob")
             .build();
 
     channel1.insertRows(rows1, "offset1");
@@ -512,26 +515,78 @@ public void testBlobCreation() throws Exception {
     Mockito.verify(flushService, Mockito.atLeast(2)).buildAndUpload(Mockito.any(), Mockito.any());
   }
 
+  @Test
+  public void testBlobSplitDueToDifferentSchema() throws Exception {
+    TestContext<?> testContext = testContextFactory.create();
+    SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
+    SnowflakeStreamingIngestChannelInternal<?> channel2 = addChannel2(testContext);
+    String colName1 = "testBlobSplitDueToDifferentSchema1";
+    String colName2 = "testBlobSplitDueToDifferentSchema2";
+    String colName3 = "testBlobSplitDueToDifferentSchema3";
+
+    List<ColumnMetadata> schema1 =
+        Arrays.asList(createTestIntegerColumn(colName1), createTestTextColumn(colName2));
+    List<ColumnMetadata> schema2 =
+        Arrays.asList(
+            createTestIntegerColumn(colName1),
+            createTestTextColumn(colName2),
+            createTestIntegerColumn(colName3));
+    channel1.getRowBuffer().setupSchema(schema1);
+    channel2.getRowBuffer().setupSchema(schema2);
+
+    List<Map<String, Object>> rows1 =
+        RowSetBuilder.newBuilder()
+            .addColumn(colName1, 11)
+            .addColumn(colName2, "bob")
+            .newRow()
+            .addColumn(colName1, 22)
+            .addColumn(colName2, "bob")
+            .build();
+
+    List<Map<String, Object>> rows2 =
+        RowSetBuilder.newBuilder()
+            .addColumn(colName1, 11)
+            .addColumn(colName2, "bob")
+            .addColumn(colName3, 11)
+            .newRow()
+            .addColumn(colName1, 22)
+            .addColumn(colName2, "bob")
+            .addColumn(colName3, 22)
+            .build();
+
+    channel1.insertRows(rows1, "offset1");
+    channel2.insertRows(rows2, "offset2");
+
+    FlushService<?> flushService = testContext.flushService;
+
+    // Force = true flushes
+    flushService.flush(true).get();
+    Mockito.verify(flushService, Mockito.atLeast(2)).buildAndUpload(Mockito.any(), Mockito.any());
+  }
+
   @Test
   public void testBuildAndUpload() throws Exception {
     TestContext<?> testContext = testContextFactory.create();
     SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
     SnowflakeStreamingIngestChannelInternal<?> channel2 = addChannel2(testContext);
+    String colName1 = "testBuildAndUpload1";
+    String colName2 = "testBuildAndUpload2";
 
-    List<ColumnMetadata> schema = Arrays.asList(createTestIntegerColumn(), createTestTextColumn());
+    List<ColumnMetadata> schema =
+        Arrays.asList(createTestIntegerColumn(colName1), createTestTextColumn(colName2));
     channel1.getRowBuffer().setupSchema(schema);
     channel2.getRowBuffer().setupSchema(schema);
 
     List<Map<String, Object>> rows1 =
         RowSetBuilder.newBuilder()
-            .addColumn("COLINT", 11)
-            .addColumn("COLCHAR", "bob")
+            .addColumn(colName1, 11)
+            .addColumn(colName2, "bob")
             .newRow()
-            .addColumn("COLINT", 22)
-            .addColumn("COLCHAR", "bob")
+            .addColumn(colName1, 22)
+            .addColumn(colName2, "bob")
             .build();
     List<Map<String, Object>> rows2 =
-        RowSetBuilder.newBuilder().addColumn("COLINT", null).addColumn("COLCHAR", "toby").build();
+        RowSetBuilder.newBuilder().addColumn(colName1, null).addColumn(colName2, "toby").build();
 
     channel1.insertRows(rows1, "offset1");
     channel2.insertRows(rows2, "offset2");
@@ -647,15 +702,18 @@ public void testBuildErrors() throws Exception {
     TestContext<?> testContext = testContextFactory.create();
     SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
     SnowflakeStreamingIngestChannelInternal<?> channel3 = addChannel3(testContext);
+    String colName1 = "testBuildErrors1";
+    String colName2 = "testBuildErrors2";
 
-    List<ColumnMetadata> schema = Arrays.asList(createTestIntegerColumn(), createTestTextColumn());
+    List<ColumnMetadata> schema =
+        Arrays.asList(createTestIntegerColumn(colName1), createTestTextColumn(colName2));
     channel1.getRowBuffer().setupSchema(schema);
     channel3.getRowBuffer().setupSchema(schema);
 
     List<Map<String, Object>> rows1 =
-        RowSetBuilder.newBuilder().addColumn("COLINT", 0).addColumn("COLCHAR", "alice").build();
+        RowSetBuilder.newBuilder().addColumn(colName1, 0).addColumn(colName2, "alice").build();
     List<Map<String, Object>> rows2 =
-        RowSetBuilder.newBuilder().addColumn("COLINT", 0).addColumn("COLCHAR", 111).build();
+        RowSetBuilder.newBuilder().addColumn(colName1, 0).addColumn(colName2, 111).build();
 
     channel1.insertRows(rows1, "offset1");
     channel3.insertRows(rows2, "offset2");

From e016947afa38d2abab11a1106234db8fe6dfc99a Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 17 May 2023 23:10:51 -0700
Subject: [PATCH 18/29] fix

---
 .../ingest/streaming/internal/FlushService.java      | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 39e17381c..45a65b75e 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -382,12 +382,14 @@ && shouldStopProcessing(
               logger.logInfo(
                   "Creation of another blob is needed because of blob size limit or different"
                       + " encryption ids or different schema, client={}, table={},  size={},"
-                      + " encryptionId1={}, encryptionId2={}",
+                      + " encryptionId1={}, encryptionId2={}, schema1={}, schema2={}",
                   this.owningClient.getName(),
                   channelData.getChannelContext().getTableName(),
                   totalBufferSizeInBytes + channelData.getBufferSize(),
                   channelData.getChannelContext().getEncryptionKeyId(),
-                  channelsDataPerTable.get(idx - 1).getChannelContext().getEncryptionKeyId());
+                  channelsDataPerTable.get(idx - 1).getChannelContext().getEncryptionKeyId(),
+                  channelData.getColumnEps().keySet(),
+                  channelsDataPerTable.get(idx - 1).getColumnEps().keySet());
               break;
             }
             totalBufferSizeInBytes += channelData.getBufferSize();
@@ -466,14 +468,14 @@ && shouldStopProcessing(
   }
 
   /**
-   * Check whether we should stop merging more channels into the chunks, we need to stop in a few
-   * cases
+   * Check whether we should stop merging more channels into the same chunk, we need to stop in a
+   * few cases
    *
    * <p>When the size is larger than a certain threshold
    *
    * <p>When the encryption key ids are not the same
    *
-   * <p>When the schema is not the same
+   * <p>When the schemas are not the same
    */
   private boolean shouldStopProcessing(
       float totalBufferSizeInBytes, ChannelData<T> current, ChannelData<T> prev) {

From 12f9eb288037f435907728ef4548b31091891d4c Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Thu, 18 May 2023 11:48:58 -0700
Subject: [PATCH 19/29] address comment

---
 .../net/snowflake/ingest/streaming/internal/FlushService.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 45a65b75e..591c1db85 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -469,7 +469,7 @@ && shouldStopProcessing(
 
   /**
    * Check whether we should stop merging more channels into the same chunk, we need to stop in a
-   * few cases
+   * few cases:
    *
    * <p>When the size is larger than a certain threshold
    *

From ab0620f1729c0ec6771b669bc9aaa7668d623b00 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Sat, 3 Jun 2023 23:10:12 -0700
Subject: [PATCH 20/29] save progress

---
 .../streaming/internal/FlushService.java      | 29 ++++++++---
 .../streaming/internal/FlushServiceTest.java  | 51 +++++++++++++++++++
 2 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index ae4d04a23..74d53f569 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -370,21 +370,28 @@ void distributeFlushTasks() {
 
         if (!channelsDataPerTable.isEmpty()) {
           int idx = 0;
+          float totalBufferSizePerTableInBytes = 0F;
           while (idx < channelsDataPerTable.size()) {
             ChannelData<T> channelData = channelsDataPerTable.get(idx);
             // Stop processing the rest of channels when needed
             if (idx > 0
                 && shouldStopProcessing(
-                    totalBufferSizeInBytes, channelData, channelsDataPerTable.get(idx - 1))) {
+                    totalBufferSizeInBytes,
+                    totalBufferSizePerTableInBytes,
+                    channelData,
+                    channelsDataPerTable.get(idx - 1))) {
               leftoverChannelsDataPerTable.addAll(
                   channelsDataPerTable.subList(idx, channelsDataPerTable.size()));
               logger.logInfo(
-                  "Creation of another blob is needed because of blob size limit or different"
-                      + " encryption ids or different schema, client={}, table={},  size={},"
-                      + " encryptionId1={}, encryptionId2={}, schema1={}, schema2={}",
+                  "Creation of another blob is needed because of blob/chunk size limit or"
+                      + " different encryption ids or different schema, client={}, table={},"
+                      + " fileSize={}, chunkSize={}, nextChannelSize={}, encryptionId1={},"
+                      + " encryptionId2={}, schema1={}, schema2={}",
                   this.owningClient.getName(),
                   channelData.getChannelContext().getTableName(),
-                  totalBufferSizeInBytes + channelData.getBufferSize(),
+                  totalBufferSizeInBytes,
+                  totalBufferSizePerTableInBytes,
+                  channelData.getBufferSize(),
                   channelData.getChannelContext().getEncryptionKeyId(),
                   channelsDataPerTable.get(idx - 1).getChannelContext().getEncryptionKeyId(),
                   channelData.getColumnEps().keySet(),
@@ -392,6 +399,7 @@ && shouldStopProcessing(
               break;
             }
             totalBufferSizeInBytes += channelData.getBufferSize();
+            totalBufferSizePerTableInBytes += channelData.getBufferSize();
             idx++;
           }
           // Add processed channels to the current blob, stop if we need to create a new blob
@@ -473,15 +481,22 @@ && shouldStopProcessing(
    * Check whether we should stop merging more channels into the same chunk, we need to stop in a
    * few cases:
    *
-   * <p>When the size is larger than a certain threshold
+   * <p>When the file size is larger than a certain threshold
+   *
+   * <p>When the chunk size is larger than a certain threshold
    *
    * <p>When the encryption key ids are not the same
    *
    * <p>When the schemas are not the same
    */
   private boolean shouldStopProcessing(
-      float totalBufferSizeInBytes, ChannelData<T> current, ChannelData<T> prev) {
+      float totalBufferSizeInBytes,
+      float totalBufferSizePerTableInBytes,
+      ChannelData<T> current,
+      ChannelData<T> prev) {
     return totalBufferSizeInBytes + current.getBufferSize() > MAX_BLOB_SIZE_IN_BYTES
+        || totalBufferSizePerTableInBytes + current.getBufferSize()
+            > this.owningClient.getParameterProvider().getMaxChunkSizeInBytes()
         || !Objects.equals(
             current.getChannelContext().getEncryptionKeyId(),
             prev.getChannelContext().getEncryptionKeyId())
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index fc049c297..39632d0a7 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -360,6 +360,18 @@ private static ColumnMetadata createTestTextColumn(String name) {
     return colChar;
   }
 
+  private static ColumnMetadata createLargeTestTextColumn(String name) {
+    ColumnMetadata colChar = new ColumnMetadata();
+    colChar.setName(name);
+    colChar.setPhysicalType("LOB");
+    colChar.setNullable(true);
+    colChar.setLogicalType("TEXT");
+    colChar.setByteLength(14000000);
+    colChar.setLength(11000000);
+    colChar.setScale(0);
+    return colChar;
+  }
+
   @Test
   public void testGetFilePath() {
     TestContext<?> testContext = testContextFactory.create();
@@ -505,6 +517,45 @@ public void testBlobSplitDueToDifferentSchema() throws Exception {
     Mockito.verify(flushService, Mockito.atLeast(2)).buildAndUpload(Mockito.any(), Mockito.any());
   }
 
+  @Test
+  public void testBlobSplitDueToChunkSizeLimit() throws Exception {
+    TestContext<?> testContext = testContextFactory.create();
+    SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
+    SnowflakeStreamingIngestChannelInternal<?> channel2 = addChannel2(testContext);
+    String colName1 = "testBlobSplitDueToDifferentSchema1";
+    String colName2 = "testBlobSplitDueToDifferentSchema2";
+    String largeData = new String(new char[10000000]);
+
+    List<ColumnMetadata> schema =
+        Arrays.asList(createTestIntegerColumn(colName1), createLargeTestTextColumn(colName2));
+    channel1.getRowBuffer().setupSchema(schema);
+    channel2.getRowBuffer().setupSchema(schema);
+
+    List<Map<String, Object>> rows =
+        RowSetBuilder.newBuilder()
+            .addColumn(colName1, 11)
+            .addColumn(colName2, largeData)
+            .newRow()
+            .addColumn(colName1, 22)
+            .addColumn(colName2, largeData)
+            .newRow()
+            .addColumn(colName1, 33)
+            .addColumn(colName2, largeData)
+            .newRow()
+            .addColumn(colName1, 44)
+            .addColumn(colName2, largeData)
+            .build();
+
+    channel1.insertRows(rows, "offset1");
+    channel2.insertRows(rows, "offset2");
+
+    FlushService<?> flushService = testContext.flushService;
+
+    // Force = true flushes
+    flushService.flush(true).get();
+    Mockito.verify(flushService, Mockito.atLeast(2)).buildAndUpload(Mockito.any(), Mockito.any());
+  }
+
   @Test
   public void testBuildAndUpload() throws Exception {
     long expectedBuildLatencyMs = 100;

From eaa729a6254b61d3d86c59caa37c6b7885ac46b3 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Mon, 5 Jun 2023 00:34:07 -0700
Subject: [PATCH 21/29] add tests

---
 .../streaming/internal/FlushService.java      |  2 +-
 .../ingest/utils/ParameterProvider.java       | 19 ++++++++++++
 .../streaming/internal/FlushServiceTest.java  | 30 ++++++++-----------
 3 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 74d53f569..6fe196680 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -496,7 +496,7 @@ private boolean shouldStopProcessing(
       ChannelData<T> prev) {
     return totalBufferSizeInBytes + current.getBufferSize() > MAX_BLOB_SIZE_IN_BYTES
         || totalBufferSizePerTableInBytes + current.getBufferSize()
-            > this.owningClient.getParameterProvider().getMaxChunkSizeInBytes()
+            > this.owningClient.getParameterProvider().getMaxChunkSizeInBytesToAvoidOom()
         || !Objects.equals(
             current.getChannelContext().getEncryptionKeyId(),
             prev.getChannelContext().getEncryptionKeyId())
diff --git a/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java b/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
index 7faea2859..e07cfaf1b 100644
--- a/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
+++ b/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
@@ -25,7 +25,11 @@ public class ParameterProvider {
   public static final String MAX_MEMORY_LIMIT_IN_BYTES = "MAX_MEMORY_LIMIT_IN_BYTES".toLowerCase();
   public static final String ENABLE_PARQUET_INTERNAL_BUFFERING =
       "ENABLE_PARQUET_INTERNAL_BUFFERING".toLowerCase();
+  // This is actually channel size limit at this moment until we implement the size tracking logic
+  // at table/chunk level
   public static final String MAX_CHUNK_SIZE_IN_BYTES = "MAX_CHUNK_SIZE_IN_BYTES".toLowerCase();
+  public static final String MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM =
+      "MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM".toLowerCase();
 
   // Default values
   public static final long BUFFER_FLUSH_INTERVAL_IN_MILLIS_DEFAULT = 1000;
@@ -40,6 +44,7 @@ public class ParameterProvider {
   public static final int BLOB_UPLOAD_MAX_RETRY_COUNT_DEFAULT = 24;
   public static final long MAX_MEMORY_LIMIT_IN_BYTES_DEFAULT = -1L;
   public static final long MAX_CHUNK_SIZE_IN_BYTES_DEFAULT = 32000000L;
+  public static final long MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT = 128000000L;
 
   /* Parameter that enables using internal Parquet buffers for buffering of rows before serializing.
   It reduces memory consumption compared to using Java Objects for buffering.*/
@@ -140,6 +145,12 @@ private void setParameterMap(Map<String, Object> parameterOverrides, Properties
 
     this.updateValue(
         MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT, parameterOverrides, props);
+
+    this.updateValue(
+        MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM,
+        MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT,
+        parameterOverrides,
+        props);
   }
 
   /** @return Longest interval in milliseconds between buffer flushes */
@@ -272,6 +283,14 @@ public long getMaxChunkSizeInBytes() {
     return (val instanceof String) ? Long.parseLong(val.toString()) : (long) val;
   }
 
+  /** @return The max chunk size in bytes that could avoid OOM at server side */
+  public long getMaxChunkSizeInBytesToAvoidOom() {
+    Object val =
+        this.parameterMap.getOrDefault(
+            MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM, MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT);
+    return (val instanceof String) ? Long.parseLong(val.toString()) : (long) val;
+  }
+
   @Override
   public String toString() {
     return "ParameterProvider{" + "parameterMap=" + parameterMap + '}';
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index 39632d0a7..99669eb03 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -7,6 +7,7 @@
 import static net.snowflake.ingest.utils.Constants.BLOB_NO_HEADER;
 import static net.snowflake.ingest.utils.Constants.BLOB_TAG_SIZE_IN_BYTES;
 import static net.snowflake.ingest.utils.Constants.BLOB_VERSION_SIZE_IN_BYTES;
+import static net.snowflake.ingest.utils.ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT;
 
 import com.codahale.metrics.Histogram;
 import com.codahale.metrics.Meter;
@@ -522,29 +523,24 @@ public void testBlobSplitDueToChunkSizeLimit() throws Exception {
     TestContext<?> testContext = testContextFactory.create();
     SnowflakeStreamingIngestChannelInternal<?> channel1 = addChannel1(testContext);
     SnowflakeStreamingIngestChannelInternal<?> channel2 = addChannel2(testContext);
-    String colName1 = "testBlobSplitDueToDifferentSchema1";
-    String colName2 = "testBlobSplitDueToDifferentSchema2";
-    String largeData = new String(new char[10000000]);
+    String colName1 = "testBlobSplitDueToChunkSizeLimit1";
+    String colName2 = "testBlobSplitDueToChunkSizeLimit2";
+    int rowSize = 10000000;
+    String largeData = new String(new char[rowSize]);
 
     List<ColumnMetadata> schema =
         Arrays.asList(createTestIntegerColumn(colName1), createLargeTestTextColumn(colName2));
     channel1.getRowBuffer().setupSchema(schema);
     channel2.getRowBuffer().setupSchema(schema);
 
-    List<Map<String, Object>> rows =
-        RowSetBuilder.newBuilder()
-            .addColumn(colName1, 11)
-            .addColumn(colName2, largeData)
-            .newRow()
-            .addColumn(colName1, 22)
-            .addColumn(colName2, largeData)
-            .newRow()
-            .addColumn(colName1, 33)
-            .addColumn(colName2, largeData)
-            .newRow()
-            .addColumn(colName1, 44)
-            .addColumn(colName2, largeData)
-            .build();
+    RowSetBuilder builder = RowSetBuilder.newBuilder();
+    RowSetBuilder.newBuilder().addColumn(colName1, 11).addColumn(colName2, largeData);
+
+    for (int idx = 0; idx <= MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT / (2 * rowSize); idx++) {
+      builder.addColumn(colName1, 11).addColumn(colName2, largeData).newRow();
+    }
+
+    List<Map<String, Object>> rows = builder.build();
 
     channel1.insertRows(rows, "offset1");
     channel2.insertRows(rows, "offset2");

From f17ba1bde2943476a26d5e996ffa9e6e0ec3ed56 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Mon, 5 Jun 2023 18:40:55 -0700
Subject: [PATCH 22/29] fix tests

---
 .../snowflake/ingest/streaming/internal/FlushService.java  | 7 ++++++-
 .../ingest/streaming/internal/FlushServiceTest.java        | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index ff2c51e20..310fb1f9b 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -259,7 +259,7 @@ CompletableFuture<Void> flush(boolean isForce) {
 
     if (isForce
         || (!DISABLE_BACKGROUND_FLUSH
-            && !this.isTestMode
+            && !isTestMode()
             && (this.isNeedFlush
                 || timeDiffMillis
                     >= this.owningClient.getParameterProvider().getBufferFlushIntervalInMs()))) {
@@ -673,4 +673,9 @@ boolean throttleDueToQueuedFlushTasks() {
     }
     return throttleOnQueuedTasks;
   }
+
+  /** Get whether we're running under test mode */
+  boolean isTestMode() {
+    return this.isTestMode;
+  }
 }
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index 99669eb03..aa8a3c8b6 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -93,7 +93,7 @@ private abstract static class TestContext<T> implements AutoCloseable {
       channelCache = new ChannelCache<>();
       Mockito.when(client.getChannelCache()).thenReturn(channelCache);
       registerService = Mockito.spy(new RegisterService(client, client.isTestMode()));
-      flushService = Mockito.spy(new FlushService<>(client, channelCache, stage, false));
+      flushService = Mockito.spy(new FlushService<>(client, channelCache, stage, true));
     }
 
     ChannelData<T> flushChannel(String name) {
@@ -411,6 +411,7 @@ public void testGetFilePath() {
   public void testFlush() throws Exception {
     TestContext<?> testContext = testContextFactory.create();
     FlushService<?> flushService = testContext.flushService;
+    Mockito.when(flushService.isTestMode()).thenReturn(false);
 
     // Nothing to flush
     flushService.flush(false).get();
@@ -549,7 +550,7 @@ public void testBlobSplitDueToChunkSizeLimit() throws Exception {
 
     // Force = true flushes
     flushService.flush(true).get();
-    Mockito.verify(flushService, Mockito.atLeast(2)).buildAndUpload(Mockito.any(), Mockito.any());
+    Mockito.verify(flushService, Mockito.times(2)).buildAndUpload(Mockito.any(), Mockito.any());
   }
 
   @Test

From e925d83a596e74dd8cc7450b5d1df29f73be3e1e Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 7 Jun 2023 14:29:29 -0700
Subject: [PATCH 23/29] address comments

---
 .../streaming/internal/FlushService.java      |  2 +-
 ...owflakeStreamingIngestChannelInternal.java |  6 ++--
 .../ingest/utils/ParameterProvider.java       | 29 ++++++++-----------
 .../streaming/internal/FlushServiceTest.java  |  4 +--
 .../internal/ParameterProviderTest.java       |  8 ++---
 .../streaming/internal/RowBufferTest.java     |  4 +--
 6 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
index 310fb1f9b..86358b24d 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/FlushService.java
@@ -496,7 +496,7 @@ private boolean shouldStopProcessing(
       ChannelData<T> prev) {
     return totalBufferSizeInBytes + current.getBufferSize() > MAX_BLOB_SIZE_IN_BYTES
         || totalBufferSizePerTableInBytes + current.getBufferSize()
-            > this.owningClient.getParameterProvider().getMaxChunkSizeInBytesToAvoidOom()
+            > this.owningClient.getParameterProvider().getMaxChunkSizeInBytes()
         || !Objects.equals(
             current.getChannelContext().getEncryptionKeyId(),
             prev.getChannelContext().getEncryptionKeyId())
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java b/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java
index b2d523a58..f5dfb73a6 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelInternal.java
@@ -126,8 +126,8 @@ class SnowflakeStreamingIngestChannelInternal<T> implements SnowflakeStreamingIn
                 ? owningClient.getParameterProvider().getEnableParquetInternalBuffering()
                 : ParameterProvider.ENABLE_PARQUET_INTERNAL_BUFFERING_DEFAULT,
             owningClient != null
-                ? owningClient.getParameterProvider().getMaxChunkSizeInBytes()
-                : ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
+                ? owningClient.getParameterProvider().getMaxChannelSizeInBytes()
+                : ParameterProvider.MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT);
     logger.logInfo(
         "Channel={} created for table={}",
         this.channelFlushContext.getName(),
@@ -362,7 +362,7 @@ public InsertValidationResponse insertRows(
     // TODO: Checking table/chunk level size reduces throughput a lot, we may want to check it only
     // if a large number of rows are inserted
     if (this.rowBuffer.getSize()
-        >= this.owningClient.getParameterProvider().getMaxChunkSizeInBytes()) {
+        >= this.owningClient.getParameterProvider().getMaxChannelSizeInBytes()) {
       this.owningClient.setNeedFlush();
     }
 
diff --git a/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java b/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
index e07cfaf1b..451682f52 100644
--- a/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
+++ b/src/main/java/net/snowflake/ingest/utils/ParameterProvider.java
@@ -25,11 +25,9 @@ public class ParameterProvider {
   public static final String MAX_MEMORY_LIMIT_IN_BYTES = "MAX_MEMORY_LIMIT_IN_BYTES".toLowerCase();
   public static final String ENABLE_PARQUET_INTERNAL_BUFFERING =
       "ENABLE_PARQUET_INTERNAL_BUFFERING".toLowerCase();
-  // This is actually channel size limit at this moment until we implement the size tracking logic
-  // at table/chunk level
+  // This should not be needed once we have the ability to track size at table/chunk level
+  public static final String MAX_CHANNEL_SIZE_IN_BYTES = "MAX_CHANNEL_SIZE_IN_BYTES".toLowerCase();
   public static final String MAX_CHUNK_SIZE_IN_BYTES = "MAX_CHUNK_SIZE_IN_BYTES".toLowerCase();
-  public static final String MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM =
-      "MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM".toLowerCase();
 
   // Default values
   public static final long BUFFER_FLUSH_INTERVAL_IN_MILLIS_DEFAULT = 1000;
@@ -43,8 +41,8 @@ public class ParameterProvider {
   public static final int IO_TIME_CPU_RATIO_DEFAULT = 2;
   public static final int BLOB_UPLOAD_MAX_RETRY_COUNT_DEFAULT = 24;
   public static final long MAX_MEMORY_LIMIT_IN_BYTES_DEFAULT = -1L;
-  public static final long MAX_CHUNK_SIZE_IN_BYTES_DEFAULT = 32000000L;
-  public static final long MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT = 128000000L;
+  public static final long MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT = 32000000L;
+  public static final long MAX_CHUNK_SIZE_IN_BYTES_DEFAULT = 128000000L;
 
   /* Parameter that enables using internal Parquet buffers for buffering of rows before serializing.
   It reduces memory consumption compared to using Java Objects for buffering.*/
@@ -144,13 +142,10 @@ private void setParameterMap(Map<String, Object> parameterOverrides, Properties
         props);
 
     this.updateValue(
-        MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT, parameterOverrides, props);
+        MAX_CHANNEL_SIZE_IN_BYTES, MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT, parameterOverrides, props);
 
     this.updateValue(
-        MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM,
-        MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT,
-        parameterOverrides,
-        props);
+        MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT, parameterOverrides, props);
   }
 
   /** @return Longest interval in milliseconds between buffer flushes */
@@ -276,18 +271,18 @@ public boolean getEnableParquetInternalBuffering() {
     return (val instanceof String) ? Boolean.parseBoolean(val.toString()) : (boolean) val;
   }
 
-  /** @return The max chunk size in bytes */
-  public long getMaxChunkSizeInBytes() {
+  /** @return The max channel size in bytes */
+  public long getMaxChannelSizeInBytes() {
     Object val =
-        this.parameterMap.getOrDefault(MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
+        this.parameterMap.getOrDefault(
+            MAX_CHANNEL_SIZE_IN_BYTES, MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT);
     return (val instanceof String) ? Long.parseLong(val.toString()) : (long) val;
   }
 
   /** @return The max chunk size in bytes that could avoid OOM at server side */
-  public long getMaxChunkSizeInBytesToAvoidOom() {
+  public long getMaxChunkSizeInBytes() {
     Object val =
-        this.parameterMap.getOrDefault(
-            MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM, MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT);
+        this.parameterMap.getOrDefault(MAX_CHUNK_SIZE_IN_BYTES, MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
     return (val instanceof String) ? Long.parseLong(val.toString()) : (long) val;
   }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
index aa8a3c8b6..38e6ac8ca 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/FlushServiceTest.java
@@ -7,7 +7,7 @@
 import static net.snowflake.ingest.utils.Constants.BLOB_NO_HEADER;
 import static net.snowflake.ingest.utils.Constants.BLOB_TAG_SIZE_IN_BYTES;
 import static net.snowflake.ingest.utils.Constants.BLOB_VERSION_SIZE_IN_BYTES;
-import static net.snowflake.ingest.utils.ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT;
+import static net.snowflake.ingest.utils.ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT;
 
 import com.codahale.metrics.Histogram;
 import com.codahale.metrics.Meter;
@@ -537,7 +537,7 @@ public void testBlobSplitDueToChunkSizeLimit() throws Exception {
     RowSetBuilder builder = RowSetBuilder.newBuilder();
     RowSetBuilder.newBuilder().addColumn(colName1, 11).addColumn(colName2, largeData);
 
-    for (int idx = 0; idx <= MAX_CHUNK_SIZE_IN_BYTES_TO_AVOID_OOM_DEFAULT / (2 * rowSize); idx++) {
+    for (int idx = 0; idx <= MAX_CHUNK_SIZE_IN_BYTES_DEFAULT / (2 * rowSize); idx++) {
       builder.addColumn(colName1, 11).addColumn(colName2, largeData).newRow();
     }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java
index 1fe034635..dcf4037c6 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/ParameterProviderTest.java
@@ -21,7 +21,7 @@ public void withValuesSet() {
     parameterMap.put(ParameterProvider.IO_TIME_CPU_RATIO, 10);
     parameterMap.put(ParameterProvider.BLOB_UPLOAD_MAX_RETRY_COUNT, 100);
     parameterMap.put(ParameterProvider.MAX_MEMORY_LIMIT_IN_BYTES, 1000L);
-    parameterMap.put(ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES, 1000000L);
+    parameterMap.put(ParameterProvider.MAX_CHANNEL_SIZE_IN_BYTES, 1000000L);
     ParameterProvider parameterProvider = new ParameterProvider(parameterMap, prop);
 
     Assert.assertEquals(3L, parameterProvider.getBufferFlushIntervalInMs());
@@ -32,7 +32,7 @@ public void withValuesSet() {
     Assert.assertEquals(10, parameterProvider.getIOTimeCpuRatio());
     Assert.assertEquals(100, parameterProvider.getBlobUploadMaxRetryCount());
     Assert.assertEquals(1000L, parameterProvider.getMaxMemoryLimitInBytes());
-    Assert.assertEquals(1000000L, parameterProvider.getMaxChunkSizeInBytes());
+    Assert.assertEquals(1000000L, parameterProvider.getMaxChannelSizeInBytes());
   }
 
   @Test
@@ -120,7 +120,7 @@ public void withDefaultValues() {
         ParameterProvider.MAX_MEMORY_LIMIT_IN_BYTES_DEFAULT,
         parameterProvider.getMaxMemoryLimitInBytes());
     Assert.assertEquals(
-        ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT,
-        parameterProvider.getMaxChunkSizeInBytes());
+        ParameterProvider.MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT,
+        parameterProvider.getMaxChannelSizeInBytes());
   }
 }
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
index b6e035c90..b60fbf9f9 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
@@ -1,7 +1,7 @@
 package net.snowflake.ingest.streaming.internal;
 
 import static java.time.ZoneOffset.UTC;
-import static net.snowflake.ingest.utils.ParameterProvider.MAX_CHUNK_SIZE_IN_BYTES_DEFAULT;
+import static net.snowflake.ingest.utils.ParameterProvider.MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
@@ -115,7 +115,7 @@ private AbstractRowBuffer<?> createTestBuffer(OpenChannelRequest.OnErrorOption o
         rs -> {},
         initialState,
         enableParquetMemoryOptimization,
-        MAX_CHUNK_SIZE_IN_BYTES_DEFAULT);
+        MAX_CHANNEL_SIZE_IN_BYTES_DEFAULT);
   }
 
   @Test

From efec4e9e58dc27cf7fa97fddab0a5bbf7c6de3b8 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Thu, 8 Jun 2023 13:00:25 -0700
Subject: [PATCH 24/29] fix naming

---
 .../ingest/streaming/internal/AbstractRowBuffer.java |  4 ++--
 .../ingest/streaming/internal/ParquetFlusher.java    | 12 ++++++++----
 .../ingest/streaming/internal/ParquetRowBuffer.java  | 10 +++++-----
 .../org/apache/parquet/hadoop/BdecParquetWriter.java | 12 ++++++------
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
index c5d0bddac..5fb24e64d 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
@@ -529,7 +529,7 @@ static <T> AbstractRowBuffer<T> createRowBuffer(
       Consumer<Float> rowSizeMetric,
       ChannelRuntimeState channelRuntimeState,
       boolean enableParquetMemoryOptimization,
-      long maxChunkSizeInBytes) {
+      long maxChannelSizeInBytes) {
     switch (bdecVersion) {
       case THREE:
         //noinspection unchecked
@@ -541,7 +541,7 @@ static <T> AbstractRowBuffer<T> createRowBuffer(
                 rowSizeMetric,
                 channelRuntimeState,
                 enableParquetMemoryOptimization,
-                maxChunkSizeInBytes);
+                maxChannelSizeInBytes);
       default:
         throw new SFException(
             ErrorCode.INTERNAL_ERROR, "Unsupported BDEC format version: " + bdecVersion);
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
index b0c14b12f..295fb9379 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
@@ -25,17 +25,17 @@ public class ParquetFlusher implements Flusher<ParquetChunkData> {
   private static final Logging logger = new Logging(ParquetFlusher.class);
   private final MessageType schema;
   private final boolean enableParquetInternalBuffering;
-  private final long maxChunkSizeInBytes;
+  private final long maxChannelSizeInBytes;
 
   /**
    * Construct parquet flusher from its schema and set flag that indicates whether Parquet memory
    * optimization is enabled, i.e. rows will be buffered in internal Parquet buffer.
    */
   public ParquetFlusher(
-      MessageType schema, boolean enableParquetInternalBuffering, long maxChunkSizeInBytes) {
+      MessageType schema, boolean enableParquetInternalBuffering, long maxChannelSizeInBytes) {
     this.schema = schema;
     this.enableParquetInternalBuffering = enableParquetInternalBuffering;
-    this.maxChunkSizeInBytes = maxChunkSizeInBytes;
+    this.maxChannelSizeInBytes = maxChannelSizeInBytes;
   }
 
   @Override
@@ -198,7 +198,11 @@ private SerializationResult serializeFromJavaObjects(
     Map<String, String> metadata = channelsDataPerTable.get(0).getVectors().metadata;
     parquetWriter =
         new BdecParquetWriter(
-            mergedData, schema, metadata, firstChannelFullyQualifiedTableName, maxChunkSizeInBytes);
+            mergedData,
+            schema,
+            metadata,
+            firstChannelFullyQualifiedTableName,
+            maxChannelSizeInBytes);
     rows.forEach(parquetWriter::writeRow);
     parquetWriter.close();
 
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
index 03b1c1762..e1a8ec08d 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
@@ -52,7 +52,7 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
 
   private MessageType schema;
   private final boolean enableParquetInternalBuffering;
-  private final long maxChunkSizeInBytes;
+  private final long maxChannelSizeInBytes;
 
   /** Construct a ParquetRowBuffer object. */
   ParquetRowBuffer(
@@ -62,7 +62,7 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
       Consumer<Float> rowSizeMetric,
       ChannelRuntimeState channelRuntimeState,
       boolean enableParquetInternalBuffering,
-      long maxChunkSizeInBytes) {
+      long maxChannelSizeInBytes) {
     super(
         onErrorOption,
         defaultTimezone,
@@ -75,7 +75,7 @@ public class ParquetRowBuffer extends AbstractRowBuffer<ParquetChunkData> {
     this.tempData = new ArrayList<>();
     this.channelName = fullyQualifiedChannelName;
     this.enableParquetInternalBuffering = enableParquetInternalBuffering;
-    this.maxChunkSizeInBytes = maxChunkSizeInBytes;
+    this.maxChannelSizeInBytes = maxChannelSizeInBytes;
   }
 
   @Override
@@ -120,7 +120,7 @@ private void createFileWriter() {
     try {
       if (enableParquetInternalBuffering) {
         bdecParquetWriter =
-            new BdecParquetWriter(fileOutput, schema, metadata, channelName, maxChunkSizeInBytes);
+            new BdecParquetWriter(fileOutput, schema, metadata, channelName, maxChannelSizeInBytes);
       } else {
         this.bdecParquetWriter = null;
       }
@@ -308,7 +308,7 @@ void closeInternal() {
 
   @Override
   public Flusher<ParquetChunkData> createFlusher() {
-    return new ParquetFlusher(schema, enableParquetInternalBuffering, maxChunkSizeInBytes);
+    return new ParquetFlusher(schema, enableParquetInternalBuffering, maxChannelSizeInBytes);
   }
 
   private static class ParquetColumn {
diff --git a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
index b2442d3dc..a7a640e0f 100644
--- a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
+++ b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
@@ -51,9 +51,9 @@ public BdecParquetWriter(
       MessageType schema,
       Map<String, String> extraMetaData,
       String channelName,
-      long maxChunkSizeInBytes)
+      long maxChannelSizeInBytes)
       throws IOException {
-    OutputFile file = new ByteArrayOutputFile(stream, maxChunkSizeInBytes);
+    OutputFile file = new ByteArrayOutputFile(stream, maxChannelSizeInBytes);
     ParquetProperties encodingProps = createParquetProperties();
     Configuration conf = new Configuration();
     WriteSupport<List<Object>> writeSupport =
@@ -164,11 +164,11 @@ private static ParquetProperties createParquetProperties() {
    */
   private static class ByteArrayOutputFile implements OutputFile {
     private final ByteArrayOutputStream stream;
-    private final long maxChunkSizeInBytes;
+    private final long maxChannelSizeInBytes;
 
-    private ByteArrayOutputFile(ByteArrayOutputStream stream, long maxChunkSizeInBytes) {
+    private ByteArrayOutputFile(ByteArrayOutputStream stream, long maxChannelSizeInBytes) {
       this.stream = stream;
-      this.maxChunkSizeInBytes = maxChunkSizeInBytes;
+      this.maxChannelSizeInBytes = maxChannelSizeInBytes;
     }
 
     @Override
@@ -189,7 +189,7 @@ public boolean supportsBlockSize() {
 
     @Override
     public long defaultBlockSize() {
-      return maxChunkSizeInBytes;
+      return maxChannelSizeInBytes;
     }
   }
 

From 65b75b148134d4a9f20bf438c2d1480741470801 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 12 Jul 2023 21:42:18 +0000
Subject: [PATCH 25/29] fix

---
 .../ingest/streaming/internal/ParquetFlusher.java    | 12 ++++--------
 .../org/apache/parquet/hadoop/BdecParquetWriter.java |  4 ++--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
index 295fb9379..b0c14b12f 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java
@@ -25,17 +25,17 @@ public class ParquetFlusher implements Flusher<ParquetChunkData> {
   private static final Logging logger = new Logging(ParquetFlusher.class);
   private final MessageType schema;
   private final boolean enableParquetInternalBuffering;
-  private final long maxChannelSizeInBytes;
+  private final long maxChunkSizeInBytes;
 
   /**
    * Construct parquet flusher from its schema and set flag that indicates whether Parquet memory
    * optimization is enabled, i.e. rows will be buffered in internal Parquet buffer.
    */
   public ParquetFlusher(
-      MessageType schema, boolean enableParquetInternalBuffering, long maxChannelSizeInBytes) {
+      MessageType schema, boolean enableParquetInternalBuffering, long maxChunkSizeInBytes) {
     this.schema = schema;
     this.enableParquetInternalBuffering = enableParquetInternalBuffering;
-    this.maxChannelSizeInBytes = maxChannelSizeInBytes;
+    this.maxChunkSizeInBytes = maxChunkSizeInBytes;
   }
 
   @Override
@@ -198,11 +198,7 @@ private SerializationResult serializeFromJavaObjects(
     Map<String, String> metadata = channelsDataPerTable.get(0).getVectors().metadata;
     parquetWriter =
         new BdecParquetWriter(
-            mergedData,
-            schema,
-            metadata,
-            firstChannelFullyQualifiedTableName,
-            maxChannelSizeInBytes);
+            mergedData, schema, metadata, firstChannelFullyQualifiedTableName, maxChunkSizeInBytes);
     rows.forEach(parquetWriter::writeRow);
     parquetWriter.close();
 
diff --git a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
index a7a640e0f..b21821520 100644
--- a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
+++ b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
@@ -51,9 +51,9 @@ public BdecParquetWriter(
       MessageType schema,
       Map<String, String> extraMetaData,
       String channelName,
-      long maxChannelSizeInBytes)
+      long maxChunkSizeInBytes)
       throws IOException {
-    OutputFile file = new ByteArrayOutputFile(stream, maxChannelSizeInBytes);
+    OutputFile file = new ByteArrayOutputFile(stream, maxChunkSizeInBytes);
     ParquetProperties encodingProps = createParquetProperties();
     Configuration conf = new Configuration();
     WriteSupport<List<Object>> writeSupport =

From e96688cda4c247936bbcb19e3e1b6e478622e5a7 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 12 Jul 2023 21:43:35 +0000
Subject: [PATCH 26/29] fix

---
 .../java/org/apache/parquet/hadoop/BdecParquetWriter.java | 8 ++++----
 .../ingest/streaming/internal/DataValidationUtilTest.java | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
index b21821520..b2442d3dc 100644
--- a/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
+++ b/src/main/java/org/apache/parquet/hadoop/BdecParquetWriter.java
@@ -164,11 +164,11 @@ private static ParquetProperties createParquetProperties() {
    */
   private static class ByteArrayOutputFile implements OutputFile {
     private final ByteArrayOutputStream stream;
-    private final long maxChannelSizeInBytes;
+    private final long maxChunkSizeInBytes;
 
-    private ByteArrayOutputFile(ByteArrayOutputStream stream, long maxChannelSizeInBytes) {
+    private ByteArrayOutputFile(ByteArrayOutputStream stream, long maxChunkSizeInBytes) {
       this.stream = stream;
-      this.maxChannelSizeInBytes = maxChannelSizeInBytes;
+      this.maxChunkSizeInBytes = maxChunkSizeInBytes;
     }
 
     @Override
@@ -189,7 +189,7 @@ public boolean supportsBlockSize() {
 
     @Override
     public long defaultBlockSize() {
-      return maxChannelSizeInBytes;
+      return maxChunkSizeInBytes;
     }
   }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
index f6f9e36af..6cd7a018e 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
@@ -1011,8 +1011,8 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index: 0, reason:"
-            + " Not a valid boolean, see"
+            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index: 0,"
+            + " reason: Not a valid boolean, see"
             + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean"
             + " for the list of supported formats",
         () -> validateAndParseBoolean("COL", "abc", 0));

From efa8e4ea26a907ea95ec6a5833f63013387ca5e7 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 20 Sep 2023 22:49:44 +0000
Subject: [PATCH 27/29] add row index

---
 .../streaming/internal/AbstractRowBuffer.java |  4 +--
 .../internal/DataValidationUtil.java          | 16 ++++++---
 .../streaming/internal/ParquetRowBuffer.java  |  4 +--
 .../internal/DataValidationUtilTest.java      | 36 +++++++++----------
 .../streaming/internal/RowBufferTest.java     |  2 +-
 .../SnowflakeStreamingIngestChannelTest.java  |  4 +--
 6 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
index fa2c65006..a3ed90c54 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
@@ -260,7 +260,7 @@ Set<String> verifyInputColumns(
       throw new SFException(
           ErrorCode.INVALID_FORMAT_ROW,
           "Extra columns: " + extraCols,
-          "Columns not present in the table shouldn't be specified.");
+          "Columns not present in the table shouldn't be specified. Row Index:%d");
     }
 
     // Check for missing columns in the row
@@ -278,7 +278,7 @@ Set<String> verifyInputColumns(
       throw new SFException(
           ErrorCode.INVALID_FORMAT_ROW,
           "Missing columns: " + missingCols,
-          "Values for all non-nullable columns must be specified.");
+          "Values for all non-nullable columns must be specified. Row Index:%d");
     }
 
     return inputColNamesMap.keySet();
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
index 1bdfc2095..7982ca6a4 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
@@ -457,7 +457,10 @@ static TimestampWrapper validateAndParseTimestamp(
     if (offsetDateTime.getYear() < 1 || offsetDateTime.getYear() > 9999) {
       throw new SFException(
           ErrorCode.INVALID_VALUE_ROW,
-          "Timestamp out of representable inclusive range of years between 1 and 9999");
+          String.format(
+              "Timestamp out of representable inclusive range of years between 1 and 9999, Row"
+                  + " Index:%d",
+              insertRowIndex));
     }
     return new TimestampWrapper(offsetDateTime, scale);
   }
@@ -588,7 +591,10 @@ static int validateAndParseDate(String columnName, Object input, long insertRowI
     if (offsetDateTime.getYear() < -9999 || offsetDateTime.getYear() > 9999) {
       throw new SFException(
           ErrorCode.INVALID_VALUE_ROW,
-          "Date out of representable inclusive range of years between -9999 and 9999");
+          String.format(
+              "Date out of representable inclusive range of years between -9999 and 9999, Row"
+                  + " Index:%d",
+              insertRowIndex));
     }
 
     return Math.toIntExact(offsetDateTime.toLocalDate().toEpochDay());
@@ -814,7 +820,7 @@ static void checkValueInRange(
       throw new SFException(
           ErrorCode.INVALID_FORMAT_ROW,
           String.format(
-              "Number out of representable exclusive range of (-1e%s..1e%s), Row Index:%s",
+              "Number out of representable exclusive range of (-1e%s..1e%s), Row Index:%d",
               precision - scale, precision - scale, insertRowIndex));
     }
   }
@@ -859,7 +865,7 @@ private static SFException typeNotAllowedException(
         ErrorCode.INVALID_FORMAT_ROW,
         String.format(
             "Object of type %s cannot be ingested into Snowflake column %s of type %s, Row"
-                + " Index:%s",
+                + " Index:%d",
             javaType.getName(), columnName, snowflakeType, insertRowIndex),
         String.format(
             String.format("Allowed Java types: %s", String.join(", ", allowedJavaTypes))));
@@ -882,7 +888,7 @@ private static SFException valueFormatNotAllowedException(
     return new SFException(
         ErrorCode.INVALID_VALUE_ROW,
         String.format(
-            "Value cannot be ingested into Snowflake column %s of type %s, Row Index: %s, reason:"
+            "Value cannot be ingested into Snowflake column %s of type %s, Row Index:%d, reason:"
                 + " %s",
             columnName, snowflakeType, rowIndex, reason));
   }
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
index 289b8a983..567dbf127 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
@@ -210,8 +210,8 @@ private float addRow(
       throw new SFException(
           ErrorCode.MAX_ROW_SIZE_EXCEEDED,
           String.format(
-              "rowSizeInBytes=%.3f maxAllowedRowSizeInBytes=%d",
-              size, clientBufferParameters.getMaxAllowedRowSizeInBytes()));
+              "rowSizeInBytes=%.3f, maxAllowedRowSizeInBytes=%d, Row Index=%d",
+              size, clientBufferParameters.getMaxAllowedRowSizeInBytes(), insertRowsCurrIndex));
     }
 
     out.accept(Arrays.asList(indexedRow));
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
index 6cd7a018e..0694020e7 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
@@ -676,19 +676,19 @@ public void testTooLargeMultiByteSemiStructuredValues() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index:0, reason:"
             + " Variant too long: length=18874376 maxLength=16777152",
         () -> validateAndParseVariant("COL", m, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index:0, reason:"
             + " Array too large. length=18874378 maxLength=16777152",
         () -> validateAndParseArray("COL", m, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index:0, reason:"
             + " Object too large. length=18874376 maxLength=16777152",
         () -> validateAndParseObject("COL", m, 0));
   }
@@ -1011,8 +1011,8 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index: 0,"
-            + " reason: Not a valid boolean, see"
+            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index:0, reason:"
+            + " Not a valid boolean, see"
             + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean"
             + " for the list of supported formats",
         () -> validateAndParseBoolean("COL", "abc", 0));
@@ -1027,7 +1027,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIME, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type TIME, Row Index:0, reason:"
             + " Not a valid time, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1043,7 +1043,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type DATE, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type DATE, Row Index:0, reason:"
             + " Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1060,7 +1060,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index: 0,"
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0,"
             + " reason: Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1077,7 +1077,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index: 0,"
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0,"
             + " reason: Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1094,7 +1094,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index: 0,"
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0,"
             + " reason: Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1110,7 +1110,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type NUMBER, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type NUMBER, Row Index:0, reason:"
             + " Not a valid number",
         () -> validateAndParseBigDecimal("COL", "abc", 0));
 
@@ -1124,7 +1124,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type REAL, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type REAL, Row Index:0, reason:"
             + " Not a valid decimal number",
         () -> validateAndParseReal("COL", "abc", 0));
 
@@ -1138,7 +1138,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type STRING, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type STRING, Row Index:0, reason:"
             + " String too long: length=3 characters maxLength=2 characters",
         () -> validateAndParseString("COL", "abc", Optional.of(2), 0));
 
@@ -1152,13 +1152,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index:0, reason:"
             + " Binary too long: length=2 maxLength=1",
         () -> validateAndParseBinary("COL", new byte[] {1, 2}, Optional.of(1), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index:0, reason:"
             + " Not a valid hex string",
         () -> validateAndParseBinary("COL", "ghi", Optional.empty(), 0));
 
@@ -1173,7 +1173,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index:0, reason:"
             + " Not a valid JSON",
         () -> validateAndParseVariant("COL", "][", 0));
 
@@ -1188,7 +1188,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index:0, reason:"
             + " Not a valid JSON",
         () -> validateAndParseArray("COL", "][", 0));
 
@@ -1203,7 +1203,7 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index: 0, reason:"
+            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index:0, reason:"
             + " Not a valid JSON",
         () -> validateAndParseObject("COL", "}{", 0));
   }
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
index 63340e25a..45426283d 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
@@ -308,7 +308,7 @@ public void testRowIndexWithMultipleRowsWithError() {
             .equalsIgnoreCase(
                 "The given row cannot be converted to the internal format due to invalid value:"
                     + " Value cannot be ingested into Snowflake column COLCHAR of type STRING, Row"
-                    + " Index: 1, reason: String too long: length=22 characters maxLength=11"
+                    + " Index:1, reason: String too long: length=22 characters maxLength=11"
                     + " characters"));
   }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java
index 62f2efdce..11be1d601 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java
@@ -572,8 +572,8 @@ public void testInsertTooLargeRow() {
             .collect(Collectors.toList());
 
     String expectedMessage =
-        "The given row exceeds the maximum allowed row size rowSizeInBytes=67109128.000"
-            + " maxAllowedRowSizeInBytes=67108864";
+        "The given row exceeds the maximum allowed row size rowSizeInBytes=67109128.000,"
+            + " maxAllowedRowSizeInBytes=67108864, Row Index=0";
 
     Map<String, Object> row = new HashMap<>();
     schema.forEach(x -> row.put(x.getName(), byteArrayOneMb));

From 22fe14828aa14c62f4a824fa7927db0847bfea40 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 27 Sep 2023 05:04:45 +0000
Subject: [PATCH 28/29] update message

---
 .../streaming/internal/AbstractRowBuffer.java | 15 +++--
 .../internal/DataValidationUtil.java          | 21 +++----
 .../streaming/internal/ParquetRowBuffer.java  |  2 +-
 .../internal/DataValidationUtilTest.java      | 60 +++++++++----------
 .../streaming/internal/RowBufferTest.java     |  4 +-
 .../SnowflakeStreamingIngestChannelTest.java  |  4 +-
 6 files changed, 52 insertions(+), 54 deletions(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
index a3ed90c54..dde639d00 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
@@ -236,10 +236,11 @@ public float getSize() {
    *
    * @param row the input row
    * @param error the insert error that we return to the customer
+   * @param rowIndex the index of the current row in the input batch
    * @return the set of input column names
    */
   Set<String> verifyInputColumns(
-      Map<String, Object> row, InsertValidationResponse.InsertError error) {
+      Map<String, Object> row, InsertValidationResponse.InsertError error, int rowIndex) {
     // Map of unquoted column name -> original column name
     Map<String, String> inputColNamesMap =
         row.keySet().stream()
@@ -260,7 +261,8 @@ Set<String> verifyInputColumns(
       throw new SFException(
           ErrorCode.INVALID_FORMAT_ROW,
           "Extra columns: " + extraCols,
-          "Columns not present in the table shouldn't be specified. Row Index:%d");
+          String.format(
+              "Columns not present in the table shouldn't be specified, rowIndex:%d", rowIndex));
     }
 
     // Check for missing columns in the row
@@ -278,7 +280,8 @@ Set<String> verifyInputColumns(
       throw new SFException(
           ErrorCode.INVALID_FORMAT_ROW,
           "Missing columns: " + missingCols,
-          "Values for all non-nullable columns must be specified. Row Index:%d");
+          String.format(
+              "Values for all non-nullable columns must be specified, rowIndex:%d", rowIndex));
     }
 
     return inputColNamesMap.keySet();
@@ -304,12 +307,12 @@ public InsertValidationResponse insertRows(
       this.channelState.updateInsertStats(System.currentTimeMillis(), this.bufferedRowCount);
       if (onErrorOption == OpenChannelRequest.OnErrorOption.CONTINUE) {
         // Used to map incoming row(nth row) to InsertError(for nth row) in response
-        long rowIndex = 0;
+        int rowIndex = 0;
         for (Map<String, Object> row : rows) {
           InsertValidationResponse.InsertError error =
               new InsertValidationResponse.InsertError(row, rowIndex);
           try {
-            Set<String> inputColumnNames = verifyInputColumns(row, error);
+            Set<String> inputColumnNames = verifyInputColumns(row, error, rowIndex);
             rowsSizeInBytes +=
                 addRow(row, this.bufferedRowCount, this.statsMap, inputColumnNames, rowIndex);
             this.bufferedRowCount++;
@@ -333,7 +336,7 @@ public InsertValidationResponse insertRows(
         float tempRowsSizeInBytes = 0F;
         int tempRowCount = 0;
         for (Map<String, Object> row : rows) {
-          Set<String> inputColumnNames = verifyInputColumns(row, null);
+          Set<String> inputColumnNames = verifyInputColumns(row, null, tempRowCount);
           tempRowsSizeInBytes +=
               addTempRow(row, tempRowCount, this.tempStatsMap, inputColumnNames, tempRowCount);
           checkBatchSizeEnforcedMaximum(tempRowsSizeInBytes);
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
index 7982ca6a4..4514b98b7 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
@@ -24,11 +24,7 @@
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeParseException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
+import java.util.*;
 import java.util.function.Supplier;
 import net.snowflake.client.jdbc.internal.google.common.collect.Sets;
 import net.snowflake.client.jdbc.internal.snowflake.common.core.SnowflakeDateTimeFormat;
@@ -458,8 +454,8 @@ static TimestampWrapper validateAndParseTimestamp(
       throw new SFException(
           ErrorCode.INVALID_VALUE_ROW,
           String.format(
-              "Timestamp out of representable inclusive range of years between 1 and 9999, Row"
-                  + " Index:%d",
+              "Timestamp out of representable inclusive range of years between 1 and 9999,"
+                  + " rowIndex:%d",
               insertRowIndex));
     }
     return new TimestampWrapper(offsetDateTime, scale);
@@ -592,8 +588,8 @@ static int validateAndParseDate(String columnName, Object input, long insertRowI
       throw new SFException(
           ErrorCode.INVALID_VALUE_ROW,
           String.format(
-              "Date out of representable inclusive range of years between -9999 and 9999, Row"
-                  + " Index:%d",
+              "Date out of representable inclusive range of years between -9999 and 9999,"
+                  + " rowIndex:%d",
               insertRowIndex));
     }
 
@@ -820,7 +816,7 @@ static void checkValueInRange(
       throw new SFException(
           ErrorCode.INVALID_FORMAT_ROW,
           String.format(
-              "Number out of representable exclusive range of (-1e%s..1e%s), Row Index:%d",
+              "Number out of representable exclusive range of (-1e%s..1e%s), rowIndex:%d",
               precision - scale, precision - scale, insertRowIndex));
     }
   }
@@ -864,8 +860,7 @@ private static SFException typeNotAllowedException(
     return new SFException(
         ErrorCode.INVALID_FORMAT_ROW,
         String.format(
-            "Object of type %s cannot be ingested into Snowflake column %s of type %s, Row"
-                + " Index:%d",
+            "Object of type %s cannot be ingested into Snowflake column %s of type %s, rowIndex:%d",
             javaType.getName(), columnName, snowflakeType, insertRowIndex),
         String.format(
             String.format("Allowed Java types: %s", String.join(", ", allowedJavaTypes))));
@@ -888,7 +883,7 @@ private static SFException valueFormatNotAllowedException(
     return new SFException(
         ErrorCode.INVALID_VALUE_ROW,
         String.format(
-            "Value cannot be ingested into Snowflake column %s of type %s, Row Index:%d, reason:"
+            "Value cannot be ingested into Snowflake column %s of type %s, rowIndex:%d, reason:"
                 + " %s",
             columnName, snowflakeType, rowIndex, reason));
   }
diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
index 567dbf127..75966eb35 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetRowBuffer.java
@@ -210,7 +210,7 @@ private float addRow(
       throw new SFException(
           ErrorCode.MAX_ROW_SIZE_EXCEEDED,
           String.format(
-              "rowSizeInBytes=%.3f, maxAllowedRowSizeInBytes=%d, Row Index=%d",
+              "rowSizeInBytes:%.3f, maxAllowedRowSizeInBytes:%d, rowIndex:%d",
               size, clientBufferParameters.getMaxAllowedRowSizeInBytes(), insertRowsCurrIndex));
     }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
index 0694020e7..86706fcf2 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
@@ -676,19 +676,19 @@ public void testTooLargeMultiByteSemiStructuredValues() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0, reason:"
             + " Variant too long: length=18874376 maxLength=16777152",
         () -> validateAndParseVariant("COL", m, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0, reason:"
             + " Array too large. length=18874378 maxLength=16777152",
         () -> validateAndParseArray("COL", m, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0, reason:"
             + " Object too large. length=18874376 maxLength=16777152",
         () -> validateAndParseObject("COL", m, 0));
   }
@@ -1005,13 +1005,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type BOOLEAN, rowIndex:0. Allowed"
             + " Java types: boolean, Number, String",
         () -> validateAndParseBoolean("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BOOLEAN, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type BOOLEAN, rowIndex:0, reason:"
             + " Not a valid boolean, see"
             + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean"
             + " for the list of supported formats",
@@ -1021,13 +1021,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type TIME, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type TIME, rowIndex:0. Allowed"
             + " Java types: String, LocalTime, OffsetTime",
         () -> validateAndParseTime("COL", new Object(), 10, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIME, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type TIME, rowIndex:0, reason:"
             + " Not a valid time, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1037,13 +1037,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type DATE, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type DATE, rowIndex:0. Allowed"
             + " Java types: String, LocalDate, LocalDateTime, ZonedDateTime, OffsetDateTime",
         () -> validateAndParseDate("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type DATE, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type DATE, rowIndex:0, reason:"
             + " Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1053,14 +1053,14 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0."
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0."
             + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime,"
             + " OffsetDateTime",
         () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, true, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0,"
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0,"
             + " reason: Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1070,14 +1070,14 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0."
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0."
             + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime,"
             + " OffsetDateTime",
         () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0,"
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0,"
             + " reason: Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1087,14 +1087,14 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0."
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0."
             + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime,"
             + " OffsetDateTime",
         () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, Row Index:0,"
+            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0,"
             + " reason: Not a valid value, see"
             + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
             + " the list of supported formats",
@@ -1104,13 +1104,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type NUMBER, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type NUMBER, rowIndex:0. Allowed"
             + " Java types: int, long, byte, short, float, double, BigDecimal, BigInteger, String",
         () -> validateAndParseBigDecimal("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type NUMBER, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type NUMBER, rowIndex:0, reason:"
             + " Not a valid number",
         () -> validateAndParseBigDecimal("COL", "abc", 0));
 
@@ -1118,13 +1118,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type REAL, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type REAL, rowIndex:0. Allowed"
             + " Java types: Number, String",
         () -> validateAndParseReal("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type REAL, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type REAL, rowIndex:0, reason:"
             + " Not a valid decimal number",
         () -> validateAndParseReal("COL", "abc", 0));
 
@@ -1132,13 +1132,13 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type STRING, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type STRING, rowIndex:0. Allowed"
             + " Java types: String, Number, boolean, char",
         () -> validateAndParseString("COL", new Object(), Optional.empty(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type STRING, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type STRING, rowIndex:0, reason:"
             + " String too long: length=3 characters maxLength=2 characters",
         () -> validateAndParseString("COL", "abc", Optional.of(2), 0));
 
@@ -1146,19 +1146,19 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0. Allowed"
             + " Java types: byte[], String",
         () -> validateAndParseBinary("COL", new Object(), Optional.empty(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0, reason:"
             + " Binary too long: length=2 maxLength=1",
         () -> validateAndParseBinary("COL", new byte[] {1, 2}, Optional.of(1), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type BINARY, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0, reason:"
             + " Not a valid hex string",
         () -> validateAndParseBinary("COL", "ghi", Optional.empty(), 0));
 
@@ -1166,14 +1166,14 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0. Allowed"
             + " Java types: String, Primitive data types and their arrays, java.time.*, List<T>,"
             + " Map<String, T>, T[]",
         () -> validateAndParseVariant("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type VARIANT, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0, reason:"
             + " Not a valid JSON",
         () -> validateAndParseVariant("COL", "][", 0));
 
@@ -1181,14 +1181,14 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0. Allowed"
             + " Java types: String, Primitive data types and their arrays, java.time.*, List<T>,"
             + " Map<String, T>, T[]",
         () -> validateAndParseArray("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type ARRAY, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0, reason:"
             + " Not a valid JSON",
         () -> validateAndParseArray("COL", "][", 0));
 
@@ -1196,14 +1196,14 @@ public void testExceptionMessages() {
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_FORMAT_ROW,
         "The given row cannot be converted to the internal format: Object of type java.lang.Object"
-            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index:0. Allowed"
+            + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0. Allowed"
             + " Java types: String, Primitive data types and their arrays, java.time.*, List<T>,"
             + " Map<String, T>, T[]",
         () -> validateAndParseObject("COL", new Object(), 0));
     expectErrorCodeAndMessage(
         ErrorCode.INVALID_VALUE_ROW,
         "The given row cannot be converted to the internal format due to invalid value: Value"
-            + " cannot be ingested into Snowflake column COL of type OBJECT, Row Index:0, reason:"
+            + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0, reason:"
             + " Not a valid JSON",
         () -> validateAndParseObject("COL", "}{", 0));
   }
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
index 45426283d..8d71d9a44 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java
@@ -307,8 +307,8 @@ public void testRowIndexWithMultipleRowsWithError() {
             .getMessage()
             .equalsIgnoreCase(
                 "The given row cannot be converted to the internal format due to invalid value:"
-                    + " Value cannot be ingested into Snowflake column COLCHAR of type STRING, Row"
-                    + " Index:1, reason: String too long: length=22 characters maxLength=11"
+                    + " Value cannot be ingested into Snowflake column COLCHAR of type STRING,"
+                    + " rowIndex:1, reason: String too long: length=22 characters maxLength=11"
                     + " characters"));
   }
 
diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java
index ad09c0a69..8fbf67264 100644
--- a/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java
+++ b/src/test/java/net/snowflake/ingest/streaming/internal/SnowflakeStreamingIngestChannelTest.java
@@ -588,8 +588,8 @@ public void testInsertTooLargeRow() {
             .collect(Collectors.toList());
 
     String expectedMessage =
-        "The given row exceeds the maximum allowed row size rowSizeInBytes=67109128.000,"
-            + " maxAllowedRowSizeInBytes=67108864, Row Index=0";
+        "The given row exceeds the maximum allowed row size rowSizeInBytes:67109128.000,"
+            + " maxAllowedRowSizeInBytes:67108864, rowIndex:0";
 
     Map<String, Object> row = new HashMap<>();
     schema.forEach(x -> row.put(x.getName(), byteArrayOneMb));

From f0dd91dbcbcfc0bbd6ea64412149a4bcfedabc36 Mon Sep 17 00:00:00 2001
From: Toby Zhang <toby.zhang@snowflake.com>
Date: Wed, 27 Sep 2023 05:55:18 +0000
Subject: [PATCH 29/29] remove star import

---
 .../ingest/streaming/internal/DataValidationUtil.java       | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
index 4514b98b7..a1831f829 100644
--- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
+++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
@@ -24,7 +24,11 @@
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeParseException;
-import java.util.*;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
 import java.util.function.Supplier;
 import net.snowflake.client.jdbc.internal.google.common.collect.Sets;
 import net.snowflake.client.jdbc.internal.snowflake.common.core.SnowflakeDateTimeFormat;