snowflakedb · sfc-gh-azagrebin · Aug 11, 2023 · jdcaperon · Aug 11, 2023 · sfc-gh-azagrebin
@@ -8,6 +8,7 @@
 import static net.snowflake.ingest.utils.Utils.getStackTrace;
 
 import com.codahale.metrics.Timer;
+import com.google.common.collect.Lists;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -195,8 +196,9 @@ List<FlushService.BlobData<T>> registerBlobs(Map<String, Timer.Context> latencyT
             Timer.Context registerContext =
                 Utils.createTimerContext(this.owningClient.registerLatency);
 
+            int blobsPerReq = getBlobNumToRegisterInOneReq(blobs.size());
             // Register the blobs, and invalidate any channels that return a failure status code
-            this.owningClient.registerBlobs(blobs);
+            Lists.partition(blobs, blobsPerReq).forEach(owningClient::registerBlobs);
 
             if (registerContext != null) {
               registerContext.stop();
@@ -216,6 +218,21 @@ List<FlushService.BlobData<T>> registerBlobs(Map<String, Timer.Context> latencyT
     return errorBlobs;
   }
 
+  /**
+   * We split blobs into batches to avoid too big payload in registration requests in case of
+   * connection hiccups if too many blobs are accumulated.
+   */
+  private int getBlobNumToRegisterInOneReq(int blobNum) {
+    int blobsPerReq = owningClient.getParameterProvider().getMaxBlobsToRegisterInOneRequest();
+    if (blobNum > blobsPerReq) {
+      logger.logWarn(
+          "Many blobs to register: {}, possibly bad connection to Snowflake or ingestion is"
+              + " too fast and needs more resources",
+          blobNum);
+    }
+    return blobsPerReq;
+  }
+
   /**
    * Get the blobsList, this is for TEST ONLY, no lock protection
    *

@@ -31,6 +31,9 @@ public class ParameterProvider {
   public static final String MAX_ALLOWED_ROW_SIZE_IN_BYTES =
       "MAX_ALLOWED_ROW_SIZE_IN_BYTES".toLowerCase();
 
+  public static final String MAX_BLOBS_TO_REGISTER_IN_ONE_REQUEST =
+      "MAX_BLOBS_TO_REGISTER_IN_ONE_REQUEST".toLowerCase();
+
   // Default values
   public static final long BUFFER_FLUSH_INTERVAL_IN_MILLIS_DEFAULT = 1000;
   public static final long BUFFER_FLUSH_CHECK_INTERVAL_IN_MILLIS_DEFAULT = 100;
@@ -51,6 +54,8 @@ public class ParameterProvider {
   It reduces memory consumption compared to using Java Objects for buffering.*/
   public static final boolean ENABLE_PARQUET_INTERNAL_BUFFERING_DEFAULT = false;
 
+  public static final int MAX_BLOBS_TO_REGISTER_IN_ONE_REQUEST_DEFAULT = 10;
+
   /** Map of parameter name to parameter value. This will be set by client/configure API Call. */
   private final Map<String, Object> parameterMap = new HashMap<>();
 
@@ -296,6 +301,13 @@ public long getMaxAllowedRowSizeInBytes() {
     return (val instanceof String) ? Long.parseLong(val.toString()) : (long) val;
   }
 
+  public int getMaxBlobsToRegisterInOneRequest() {
+    Object val =
+        this.parameterMap.getOrDefault(
+            MAX_BLOBS_TO_REGISTER_IN_ONE_REQUEST, MAX_BLOBS_TO_REGISTER_IN_ONE_REQUEST_DEFAULT);
+    return (val instanceof String) ? Integer.parseInt(val.toString()) : (int) val;
+  }
+
   @Override
   public String toString() {
     return "ParameterProvider{" + "parameterMap=" + parameterMap + '}';