Skip to content

Commit

Permalink
[SNOW-1882616] Error out for duplicate keys in variant (#929)
Browse files Browse the repository at this point in the history
* error out for duplicate keys

* add more tests and error log
  • Loading branch information
sfc-gh-ggeng authored Jan 16, 2025
1 parent 5d11892 commit 6c3190c
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ class DataValidationUtil {

private static final ObjectMapper objectMapper = new ObjectMapper();

private static final JsonFactory factory = new JsonFactory();
private static final JsonFactory factory =
new JsonFactory().configure(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION, true);

// The version of Jackson we are using does not support serialization of date objects from the
// java.time package. Here we define a module with custom java.time serializers. Additionally, we
Expand Down Expand Up @@ -176,7 +177,16 @@ private static String validateAndParseSemiStructured(
throw valueFormatNotAllowedException(
columnName, snowflakeType, "Not a valid JSON", insertRowIndex);
} catch (IOException e) {
throw new SFException(e, ErrorCode.IO_ERROR, "Cannot create JSON Parser or JSON generator");
if (e.getMessage().contains("Duplicate field")) {
throw valueFormatNotAllowedException(
columnName, snowflakeType, "Not a valid JSON: duplicate field", insertRowIndex);
}
throw new SFException(
e,
ErrorCode.IO_ERROR,
String.format(
"Cannot create JSON Parser or JSON generator for column %s of type %s, rowIndex:%d",
columnName, snowflakeType, insertRowIndex));
}
// We return the minified string from the result writer
return resultWriter.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,35 @@ public void testValidateAndParseObject() throws Exception {
() -> validateAndParseObjectNew("COL", Collections.singletonMap("foo", new Object()), 0));
}

@Test
public void testValidateDuplicateKeys() {
// simple JSON object with duplicate keys can not be ingested
expectError(
ErrorCode.INVALID_VALUE_ROW,
() -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}", 0));
expectError(
ErrorCode.INVALID_VALUE_ROW,
() -> validateAndParseVariantNew("COL", "{\"key\":1, \"key\":2}", 0));

// nested JSON object with duplicate keys can not be ingested
expectError(
ErrorCode.INVALID_VALUE_ROW,
() ->
validateAndParseObjectNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0));
expectError(
ErrorCode.INVALID_VALUE_ROW,
() ->
validateAndParseVariantNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0));

// array of objects with duplicate keys can not be ingested
expectError(
ErrorCode.INVALID_VALUE_ROW,
() -> validateAndParseArrayNew("COL", "[{\"key\":1, \"key\":2}]", 0));
expectError(
ErrorCode.INVALID_VALUE_ROW,
() -> validateAndParseVariantNew("COL", "[{\"key\":1, \"key\":2}]", 0));
}

@Test
public void testTooLargeVariant() {
char[] stringContent = new char[16 * 1024 * 1024 - 16]; // {"a":"11","b":""}
Expand Down

0 comments on commit 6c3190c

Please sign in to comment.