From 1a437d6edf7c2e56a5e2bccfdae92d937641ff39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Bobowski?=
 <145468486+sfc-gh-mbobowski@users.noreply.github.com>
Date: Tue, 3 Dec 2024 16:25:10 +0100
Subject: [PATCH] SNOW-1831140 Complete Iceberg e2e tests (#1015)

---
 test/test_suit/base_iceberg_test.py           | 238 ++++++++++++++++--
 test/test_suit/iceberg_avro_aws.py            |  20 +-
 .../iceberg_schema_evolution_avro_aws.py      |  15 +-
 .../iceberg_schema_evolution_json_aws.py      |   7 +-
 4 files changed, 237 insertions(+), 43 deletions(-)

diff --git a/test/test_suit/base_iceberg_test.py b/test/test_suit/base_iceberg_test.py
index 713c6b4f9..b9ab7dafc 100644
--- a/test/test_suit/base_iceberg_test.py
+++ b/test/test_suit/base_iceberg_test.py
@@ -2,6 +2,7 @@
 from test_suit.assertions import *
 from test_suit.test_utils import RetryableError, NonRetryableError
 import json
+from confluent_kafka import avro
 
 class BaseIcebergTest(BaseE2eTest):
 
@@ -49,33 +50,186 @@ def __init__(self, driver, name_salt: str, config_file_name: str):
             "type":"record",
             "name":"value_schema",
             "fields": [
+            {
+              "name": "id",
+              "type": [
+                "null",
+                "int"
+              ]
+            },
+            {
+              "name": "body_temperature",
+              "type": [
+                "null",
+                "float"
+              ]
+            },
+            {
+              "name": "name",
+              "type": [
+                "null",
+                "string"
+              ]
+            },
+            {
+              "name": "approved_coffee_types",
+              "type": [
+                "null",
                 {
-                  "name": "id",
-                  "type": "int"
-                },
+                  "type": "array",
+                  "items": "string"
+                }
+              ]
+            },
+            {
+              "name": "animals_possessed",
+              "type": [
+                "null",
+                {
+                  "type": "map",
+                  "values": "boolean"
+                }
+              ]
+            }
+          ]
+        }
+        """
+
+        self.test_message_for_schema_evolution_schema = """
+        {
+          "type": "record",
+          "name": "value_schema",
+          "fields": [
+            {
+              "name": "id",
+              "type": [
+                "null",
+                "int"
+              ]
+            },
+            {
+              "name": "body_temperature",
+              "type": [
+                "null",
+                "float"
+              ]
+            },
+            {
+              "name": "name",
+              "type": [
+                "null",
+                "string"
+              ]
+            },
+            {
+              "name": "approved_coffee_types",
+              "type": [
+                "null",
+                {
+                  "type": "array",
+                  "items": "string"
+                }
+              ]
+            },
+            {
+              "name": "animals_possessed",
+              "type": [
+                "null",
                 {
-                  "name": "body_temperature",
-                  "type": "float"
-                },
+                  "type": "map",
+                  "values": "boolean"
+                }
+              ]
+            },
+            {
+              "name": "null_long",
+              "default": null,
+              "type": [
+                "null",
+                "long"
+              ]
+            },
+            {
+              "name": "null_array",
+              "default": null,
+              "type": [
+                "null",
                 {
-                  "name": "name",
-                  "type": "string"
-                },
+                  "type": "array",
+                  "items": "int"
+                }
+              ]
+            },
+            {
+              "name": "null_object",
+              "default": null,
+              "type": [
+                "null",
+                {
+                  "type": "record",
+                  "name": "null_object_record",
+                  "fields": [
+                    {
+                      "name": "key",
+                      "type": "string"
+                    }
+                  ]
+                }
+              ]
+            },
+            {
+              "name": "empty_array",
+              "default": null,
+              "type": [
+                "null",
                 {
-                  "name": "approved_coffee_types",
-                  "type": {
-                    "type": "array",
-                    "items": "string"
-                  }
-                },
+                  "type": "array",
+                  "items": "int"
+                }
+              ]
+            },
+            {
+              "name": "some_object",
+              "default": null,
+              "type": [
+                "null",
                 {
-                  "name": "animals_possessed",
-                  "type": {
-                    "type": "map",
-                    "values": "boolean"
-                  }
+                  "type": "record",
+                  "name": "some_object_record",
+                  "fields": [
+                    {
+                      "name": "null_key",
+                      "type": [
+                        "null",
+                        "string"
+                      ]
+                    },
+                    {
+                      "name": "string_key",
+                      "type": "string"
+                    },
+                    {
+                      "name": "another_string_key",
+                      "type": [
+                        "null",
+                        "string"
+                      ]
+                    },
+                    {
+                      "name": "inner_object",
+                      "type": [
+                        "null",
+                        {
+                          "type": "map",
+                          "values": "int"
+                        }
+                      ]
+                    }
+                  ]
                 }
-            ]
+              ]
+            }
+          ]
         }
         """
 
@@ -128,6 +282,39 @@ def _verify_iceberg_content_from_docs(self, content: dict):
         assert_equals(False, content['animals_possessed']['cats'])
 
 
+    def _verify_iceberg_content_for_schema_evolution_1(self, content: dict):
+        assert_equals(None, content['id'])
+        assert_equals(None, content['body_temperature'])
+        assert_equals(None, content['name'])
+        assert_equals(None, content['approved_coffee_types'])
+        assert_equals(None, content['animals_possessed'])
+
+        assert_equals(None, content['null_long'])
+        assert_equals(None, content['null_array'])
+        assert_equals(None, content['null_object'])
+        assert_equals([], content['empty_array'])
+        assert_equals(None, content['some_object']['null_key'])
+        assert_equals('string_key', content['some_object']['string_key'])
+
+
+    def _verify_iceberg_content_for_schema_evolution_2(self, content: dict):
+        assert_equals(None, content['id'])
+        assert_equals(None, content['body_temperature'])
+        assert_equals(None, content['name'])
+        assert_equals(None, content['approved_coffee_types'])
+        assert_equals(None, content['animals_possessed'])
+
+        assert_equals(2137, content['null_long'])
+        assert_equals([1, 2, 3], content['null_array'])
+        assert_equals('value', content['null_object']['key'])
+        assert_equals([1, 2, 3], content['empty_array'])
+        assert_equals(None, content['some_object']['null_key'])
+        assert_equals('string_key', content['some_object']['string_key'])
+        assert_equals('another_string_key', content['some_object']['another_string_key'])
+        assert_equals(456, content['some_object']['inner_object']['inner_object_key'])
+
+
+
     def _verify_iceberg_metadata(self, metadata: dict):
         assert_equals(0, metadata['offset'])
         assert_equals(0, metadata['partition'])
@@ -161,3 +348,12 @@ def _select_schematized_record_with_offset(self, offset: int) -> dict:
     def __none_or_json_loads(self, value: str) -> dict:
         return None if value is None else json.loads(value)
 
+
+    def _send_avro_messages(self, message: str, schema: str):
+        self.driver.sendAvroSRData(
+            topic=self.topic,
+            value=[message for _ in range(100)],
+            value_schema=avro.loads(schema),
+            headers=self.test_headers,
+        )
+
diff --git a/test/test_suit/iceberg_avro_aws.py b/test/test_suit/iceberg_avro_aws.py
index f6cf70625..2952616af 100644
--- a/test/test_suit/iceberg_avro_aws.py
+++ b/test/test_suit/iceberg_avro_aws.py
@@ -17,27 +17,11 @@ def setup(self):
 
 
     def send(self):
-        value = []
-
-        for e in range(100):
-            value.append(self.test_message_from_docs)
-
-        self.driver.sendAvroSRData(
-            topic=self.topic,
-            value=value,
-            value_schema=avro.loads(self.test_message_from_docs_schema),
-            headers=self.test_headers,
-        )
+        self._send_avro_messages(self.test_message_from_docs, self.test_message_from_docs_schema)
 
 
     def verify(self, round):
-        number_of_records = self.driver.select_number_of_records(self.topic)
-        if number_of_records == 0:
-            raise RetryableError()
-        elif number_of_records != 100:
-            raise NonRetryableError(
-                "Number of record in table is different from number of record sent"
-            )
+        self._assert_number_of_records_in_table(100)
 
         first_record = (
             self.driver.snowflake_conn.cursor()
diff --git a/test/test_suit/iceberg_schema_evolution_avro_aws.py b/test/test_suit/iceberg_schema_evolution_avro_aws.py
index 74a80c27d..13348f9da 100644
--- a/test/test_suit/iceberg_schema_evolution_avro_aws.py
+++ b/test/test_suit/iceberg_schema_evolution_avro_aws.py
@@ -15,8 +15,19 @@ def setup(self):
 
 
     def send(self):
-        pass
+        self._send_avro_messages(self.test_message_from_docs, self.test_message_from_docs_schema)
+        self._send_avro_messages(self.test_message_for_schema_evolution_1, self.test_message_for_schema_evolution_schema)
+        self._send_avro_messages(self.test_message_for_schema_evolution_2, self.test_message_for_schema_evolution_schema)
 
 
     def verify(self, round):
-        pass
+        self._assert_number_of_records_in_table(300)
+
+        actual_record_from_docs_dict = self._select_schematized_record_with_offset(1)
+        self._verify_iceberg_content_from_docs(actual_record_from_docs_dict)
+
+        actual_record_for_schema_evolution_1 = self._select_schematized_record_with_offset(100)
+        self._verify_iceberg_content_for_schema_evolution_1(actual_record_for_schema_evolution_1)
+
+        actual_record_for_schema_evolution_2 = self._select_schematized_record_with_offset(200)
+        self._verify_iceberg_content_for_schema_evolution_2(actual_record_for_schema_evolution_2)
\ No newline at end of file
diff --git a/test/test_suit/iceberg_schema_evolution_json_aws.py b/test/test_suit/iceberg_schema_evolution_json_aws.py
index 2c99ea23c..82f46e18e 100644
--- a/test/test_suit/iceberg_schema_evolution_json_aws.py
+++ b/test/test_suit/iceberg_schema_evolution_json_aws.py
@@ -26,9 +26,12 @@ def verify(self, round):
         self._assert_number_of_records_in_table(200)
 
         actual_record_from_docs_dict = self._select_schematized_record_with_offset(1)
+        self._verify_iceberg_content_from_docs(actual_record_from_docs_dict)
+
         actual_record_for_schema_evolution_1 = self._select_schematized_record_with_offset(100)
+        self._verify_iceberg_content_for_schema_evolution_1(actual_record_for_schema_evolution_1)
+
         # TODO SNOW-1731264
         # actual_record_for_schema_evolution_2 = self._select_schematized_record_with_offset(200)
 
-        print(actual_record_from_docs_dict)
-        self._verify_iceberg_content_from_docs(actual_record_from_docs_dict)
+