apache · nickdelnano · Jan 7, 2025 · Jan 7, 2025 · Jan 10, 2025 · Jan 12, 2025
diff --git a/docs/docs/spark-getting-started.md b/docs/docs/spark-getting-started.md
@@ -77,21 +77,28 @@ Once your table is created, insert data using [`INSERT INTO`](spark-writes.md#in
 
 ```sql
 INSERT INTO local.db.table VALUES (1, 'a'), (2, 'b'), (3, 'c');
-INSERT INTO local.db.table SELECT id, data FROM source WHERE length(data) = 1;
 ```
 
 Iceberg also adds row-level SQL updates to Spark, [`MERGE INTO`](spark-writes.md#merge-into) and [`DELETE FROM`](spark-writes.md#delete-from):
 
 ```sql
-MERGE INTO local.db.target t USING (SELECT * FROM updates) u ON t.id = u.id
-WHEN MATCHED THEN UPDATE SET t.count = t.count + u.count
+CREATE TABLE local.db.updates (id bigint, data string) USING iceberg;
+
+INSERT INTO local.db.updates VALUES (1, 'x'), (2, 'y'), (4, 'z');
+
+MERGE INTO local.db.table t
+USING (SELECT * FROM local.db.updates) u ON t.id = u.id
+WHEN MATCHED THEN UPDATE SET t.data = u.data
 WHEN NOT MATCHED THEN INSERT *;
+
+-- ((1, 'x'), (2, 'y'), (3, 'c'), (4, 'z'))
+SELECT * FROM local.db.table;
 ```
 
 Iceberg supports writing DataFrames using the new [v2 DataFrame write API](spark-writes.md#writing-with-dataframes):
 
 ```scala
-spark.table("source").select("id", "data")
+spark.table("local.db.updates").select("id", "data")
      .writeTo("local.db.table").append()
 ```
 
@@ -160,7 +167,7 @@ This type conversion table describes how Spark types are converted to the Iceber
 | map             | map                        |       |
 
 !!! info
-    The table is based on representing conversion during creating table. In fact, broader supports are applied on write. Here're some points on write:
+    Broader type conversions are applied on write:
 
     * Iceberg numeric types (`integer`, `long`, `float`, `double`, `decimal`) support promotion during writes. e.g. You can write Spark types `short`, `byte`, `integer`, `long` to Iceberg type `long`.
     * You can write to Iceberg `fixed` type using Spark `binary` type. Note that assertion on the length will be performed.

diff --git a/spark/v3.3/spark-runtime/src/integration/java/org/apache/iceberg/spark/SmokeTest.java b/spark/v3.3/spark-runtime/src/integration/java/org/apache/iceberg/spark/SmokeTest.java
@@ -40,8 +40,6 @@ public void dropTable() {
   }
 
   // Run through our Doc's Getting Started Example
-  // TODO Update doc example so that it can actually be run, modifications were required for this
-  // test suite to run
   @Test
   public void testGettingStarted() throws IOException {
     // Creating a table
@@ -66,25 +64,25 @@ public void testGettingStarted() throws IOException {
     sql(
         "CREATE TABLE updates (id bigint, data string) USING parquet LOCATION '%s'",
         temp.newFolder());
-    sql("INSERT INTO updates VALUES (1, 'x'), (2, 'x'), (4, 'z')");
+    sql("INSERT INTO updates VALUES (1, 'x'), (2, 'y'), (4, 'z')");
 
     sql(
         "MERGE INTO %s t USING (SELECT * FROM updates) u ON t.id = u.id\n"
             + "WHEN MATCHED THEN UPDATE SET t.data = u.data\n"
             + "WHEN NOT MATCHED THEN INSERT *",
         tableName);
+
+    // Reading
     Assert.assertEquals(
         "Table should now have 5 rows", 5L, scalarSql("SELECT COUNT(*) FROM %s", tableName));
     Assert.assertEquals(
         "Record 1 should now have data x",
         "x",
         scalarSql("SELECT data FROM %s WHERE id = 1", tableName));
-
-    // Reading
     Assert.assertEquals(
-        "There should be 2 records with data x",
-        2L,
-        scalarSql("SELECT count(1) as count FROM %s WHERE data = 'x' GROUP BY data ", tableName));
+            "Record 2 should now have data y",
+            "y",
+            scalarSql("SELECT data FROM %s WHERE id = 2", tableName));
 
     // Not supported because of Spark limitation
     if (!catalogName.equals("spark_catalog")) {

diff --git a/spark/v3.4/spark-runtime/src/integration/java/org/apache/iceberg/spark/SmokeTest.java b/spark/v3.4/spark-runtime/src/integration/java/org/apache/iceberg/spark/SmokeTest.java
@@ -42,8 +42,6 @@ public void dropTable() {
   }
 
   // Run through our Doc's Getting Started Example
-  // TODO Update doc example so that it can actually be run, modifications were required for this
-  // test suite to run
   @Test
   public void testGettingStarted() throws IOException {
     // Creating a table
@@ -68,25 +66,25 @@ public void testGettingStarted() throws IOException {
     sql(
         "CREATE TABLE updates (id bigint, data string) USING parquet LOCATION '%s'",
         temp.newFolder());
-    sql("INSERT INTO updates VALUES (1, 'x'), (2, 'x'), (4, 'z')");
+    sql("INSERT INTO updates VALUES (1, 'x'), (2, 'y'), (4, 'z')");
 
     sql(
         "MERGE INTO %s t USING (SELECT * FROM updates) u ON t.id = u.id\n"
             + "WHEN MATCHED THEN UPDATE SET t.data = u.data\n"
             + "WHEN NOT MATCHED THEN INSERT *",
         tableName);
+
+    // Reading
     Assert.assertEquals(
         "Table should now have 5 rows", 5L, scalarSql("SELECT COUNT(*) FROM %s", tableName));
     Assert.assertEquals(
         "Record 1 should now have data x",
         "x",
         scalarSql("SELECT data FROM %s WHERE id = 1", tableName));
-
-    // Reading
     Assert.assertEquals(
-        "There should be 2 records with data x",
-        2L,
-        scalarSql("SELECT count(1) as count FROM %s WHERE data = 'x' GROUP BY data ", tableName));
+            "Record 2 should now have data y",
+            "y",
+            scalarSql("SELECT data FROM %s WHERE id = 2", tableName));
 
     // Not supported because of Spark limitation
     if (!catalogName.equals("spark_catalog")) {

diff --git a/spark/v3.5/spark-runtime/src/integration/java/org/apache/iceberg/spark/SmokeTest.java b/spark/v3.5/spark-runtime/src/integration/java/org/apache/iceberg/spark/SmokeTest.java
@@ -38,8 +38,6 @@ public void dropTable() {
   }
 
   // Run through our Doc's Getting Started Example
-  // TODO Update doc example so that it can actually be run, modifications were required for this
-  // test suite to run
   @TestTemplate
   public void testGettingStarted() throws IOException {
     // Creating a table
@@ -66,26 +64,24 @@ public void testGettingStarted() throws IOException {
     sql(
         "CREATE TABLE updates (id bigint, data string) USING parquet LOCATION '%s'",
         Files.createTempDirectory(temp, "junit"));
-    sql("INSERT INTO updates VALUES (1, 'x'), (2, 'x'), (4, 'z')");
+    sql("INSERT INTO updates VALUES (1, 'x'), (2, 'y'), (4, 'z')");
 
     sql(
         "MERGE INTO %s t USING (SELECT * FROM updates) u ON t.id = u.id\n"
             + "WHEN MATCHED THEN UPDATE SET t.data = u.data\n"
             + "WHEN NOT MATCHED THEN INSERT *",
         tableName);
+
+    // Reading
     assertThat(scalarSql("SELECT COUNT(*) FROM %s", tableName))
         .as("Table should now have 5 rows")
         .isEqualTo(5L);
     assertThat(scalarSql("SELECT data FROM %s WHERE id = 1", tableName))
         .as("Record 1 should now have data x")
         .isEqualTo("x");
-
-    // Reading
-    assertThat(
-            scalarSql(
-                "SELECT count(1) as count FROM %s WHERE data = 'x' GROUP BY data ", tableName))
-        .as("There should be 2 records with data x")
-        .isEqualTo(2L);
+    assertThat(scalarSql("SELECT data FROM %s WHERE id = 2", tableName))
+        .as("Record 2 should now have data y")
+        .isEqualTo("y");
 
     // Not supported because of Spark limitation
     if (!catalogName.equals("spark_catalog")) {