From 8ba1e0c1a778b42616970a1c860fa6f048a7e0c5 Mon Sep 17 00:00:00 2001 From: Alec Huang Date: Fri, 15 Nov 2024 14:58:39 -0800 Subject: [PATCH] SNOW-1808626 Fix struct type statistic for missing fields (#908) --- .../internal/IcebergParquetValueParser.java | 3 ++ .../datatypes/IcebergStructuredIT.java | 34 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java b/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java index 7a7ae5eab..b3b7dbf02 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/IcebergParquetValueParser.java @@ -468,6 +468,9 @@ private static ParquetBufferValue getStructValue( missingFields.add(type.getFieldName(i)); } else { listVal.add(null); + subColumnFinder + .getSubColumns(type.getType(i).getId()) + .forEach(subColumn -> statsMap.get(subColumn).incCurrentNullCount()); } } } diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergStructuredIT.java b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergStructuredIT.java index 2995d1dc9..fb6c746ba 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergStructuredIT.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/IcebergStructuredIT.java @@ -7,6 +7,7 @@ import static org.assertj.core.api.Assertions.assertThat; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; @@ -58,6 +59,7 @@ public void testStructuredDataType() throws Exception { "object(a int, b string, c boolean)", "{\"a\": 1, \"b\": \"test\", \"c\": true}"); assertStructuredDataType("map(string, int)", "{\"key1\": 1}"); assertStructuredDataType("array(int)", "[1, 2, 3]"); + assertStructuredDataType("object(a int, b string, c boolean) not null", "{}"); assertStructuredDataType("array(string) not null", "[]"); assertStructuredDataType("map(string, int) not null", "{}"); assertMap( @@ -68,6 +70,9 @@ public void testStructuredDataType() throws Exception { } }); assertStructuredDataType("array(string)", null); + assertStructuredDataType("map(string, int)", null); + assertStructuredDataType("object(a int, b string, c boolean)", null); + assertStructuredDataType("object(a int, b string, c boolean)", "{\"a\": null}"); /* Map with null key */ Assertions.assertThatThrownBy( @@ -443,6 +448,8 @@ private void assertStructuredDataType(String dataType, String value) throws Exce String tmp = res.getString(2); JsonNode actualNode = tmp == null ? null : objectMapper.readTree(tmp); JsonNode expectedNode = value == null ? null : objectMapper.readTree(value); + removeNullFields(actualNode); + removeNullFields(expectedNode); assertThat(actualNode).isEqualTo(expectedNode); } @@ -464,4 +471,31 @@ private void assertMap(String dataType, Map value) throws Exception { JsonNode expectedNode = value == null ? null : objectMapper.valueToTree(value); assertThat(actualNode).isEqualTo(expectedNode); } + + /** + * Remove null fields from the JSON node. This is used to compare the JSON node ignoring null + * values. e.g. "{}" and "{a: null}". + * + * @param node JSON node + */ + private void removeNullFields(JsonNode node) { + if (node == null) { + return; + } + if (node.isObject()) { + List keys = new ArrayList<>(); + node.fields() + .forEachRemaining( + entry -> { + if (entry.getValue().isNull()) { + keys.add(entry.getKey()); + } else { + removeNullFields(entry.getValue()); + } + }); + keys.forEach(((ObjectNode) (node))::remove); + } else if (node.isArray()) { + node.forEach(this::removeNullFields); + } + } }