Skip to content

Commit

Permalink
Add underscore as separator and reduce blast radius
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-psaha committed Sep 17, 2024
1 parent 3f6a5c0 commit a76ef42
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,16 @@ private SerializationResult serializeFromJavaObjects(
// Using chunk offset as suffix ensures that for interleaved tables, the file
// id key is unique for each chunk. Each chunk is logically a separate Parquet file that happens
// to be bundled together.
String shortName = StreamingIngestUtils.getShortname(filePath);
final String[] parts = shortName.split("\\.");
Preconditions.checkState(parts.length == 2, "Invalid file name format");
metadata.put(
Constants.PRIMARY_FILE_ID_KEY,
String.format("%s%s.%s", parts[0], chunkStartOffset, parts[1]));
if (chunkStartOffset == 0) {
metadata.put(Constants.PRIMARY_FILE_ID_KEY, StreamingIngestUtils.getShortname(filePath));
} else {
String shortName = StreamingIngestUtils.getShortname(filePath);
final String[] parts = shortName.split("\\.");
Preconditions.checkState(parts.length == 2, "Invalid file name format");
metadata.put(
Constants.PRIMARY_FILE_ID_KEY,
String.format("%s_%d.%s", parts[0], chunkStartOffset, parts[1]));
}
parquetWriter =
new BdecParquetWriter(
mergedData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1794,7 +1794,7 @@ public void testParquetFileNameMetadata() throws IOException {

BdecParquetReader reader = new BdecParquetReader(result.chunkData.toByteArray());
Assert.assertEquals(
"testParquetFileNameMetadata13.bdec",
"testParquetFileNameMetadata_13.bdec",
reader.getKeyValueMetadata().get(Constants.PRIMARY_FILE_ID_KEY));
Assert.assertEquals(
RequestBuilder.DEFAULT_VERSION,
Expand Down

0 comments on commit a76ef42

Please sign in to comment.