From a76ef42ad9d4f88ff9d1eadafe32f75aab44fa68 Mon Sep 17 00:00:00 2001 From: Purujit Saha Date: Tue, 17 Sep 2024 17:31:38 +0000 Subject: [PATCH] Add underscore as separator and reduce blast radius --- .../streaming/internal/ParquetFlusher.java | 16 ++++++++++------ .../ingest/streaming/internal/RowBufferTest.java | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java index 46064ef99..0d38e36cd 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/ParquetFlusher.java @@ -124,12 +124,16 @@ private SerializationResult serializeFromJavaObjects( // Using chunk offset as suffix ensures that for interleaved tables, the file // id key is unique for each chunk. Each chunk is logically a separate Parquet file that happens // to be bundled together. - String shortName = StreamingIngestUtils.getShortname(filePath); - final String[] parts = shortName.split("\\."); - Preconditions.checkState(parts.length == 2, "Invalid file name format"); - metadata.put( - Constants.PRIMARY_FILE_ID_KEY, - String.format("%s%s.%s", parts[0], chunkStartOffset, parts[1])); + if (chunkStartOffset == 0) { + metadata.put(Constants.PRIMARY_FILE_ID_KEY, StreamingIngestUtils.getShortname(filePath)); + } else { + String shortName = StreamingIngestUtils.getShortname(filePath); + final String[] parts = shortName.split("\\."); + Preconditions.checkState(parts.length == 2, "Invalid file name format"); + metadata.put( + Constants.PRIMARY_FILE_ID_KEY, + String.format("%s_%d.%s", parts[0], chunkStartOffset, parts[1])); + } parquetWriter = new BdecParquetWriter( mergedData, diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java index 2f9fee2cf..a567b3433 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/RowBufferTest.java @@ -1794,7 +1794,7 @@ public void testParquetFileNameMetadata() throws IOException { BdecParquetReader reader = new BdecParquetReader(result.chunkData.toByteArray()); Assert.assertEquals( - "testParquetFileNameMetadata13.bdec", + "testParquetFileNameMetadata_13.bdec", reader.getKeyValueMetadata().get(Constants.PRIMARY_FILE_ID_KEY)); Assert.assertEquals( RequestBuilder.DEFAULT_VERSION,