Skip to content

Commit

Permalink
Add iceberg metadata alter name test (#4645)
Browse files Browse the repository at this point in the history
  • Loading branch information
SterlingT3485 authored Dec 17, 2024
1 parent 881912a commit df1f60b
Show file tree
Hide file tree
Showing 22 changed files with 344 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# README
this iceberg table is generated by using DuckDB (v0.7.0) to generated TPC-H lineitem
SF0.01 then storing that to a parquet file.

Then pyspark (3.3.1) was used with the iceberg extension from https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.0.0/iceberg-spark-runtime-3.3_2.12-1.0.0.jar
to write the iceberg table.

finally, using pyspark, a delete query was performed on this iceberg table:

```
DELETE FROM iceberg_catalog.lineitem_iceberg where l_extendedprice < 10000
```

The result for Q06 of TPC-H on this table according to pyspark is now:
```
[Row(revenue=Decimal('1077536.9101'))]
```

Note: it appears that there are no deletes present in this iceberg table, the whole thing was rewritten.
this is likely due to the fact that the table is so small?
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"format-version" : 2,
"table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b",
"location" : "./lineitem_iceberg",
"last-sequence-number" : 1,
"last-updated-ms" : 1676473674504,
"last-column-id" : 16,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey",
"required" : false,
"type" : "int"
}, {
"id" : 2,
"name" : "l_partkey",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey",
"required" : false,
"type" : "int"
}, {
"id" : 4,
"name" : "l_linenumber",
"required" : false,
"type" : "int"
}, {
"id" : 5,
"name" : "l_quantity",
"required" : false,
"type" : "int"
}, {
"id" : 6,
"name" : "l_extendedprice",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 7,
"name" : "l_discount",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 8,
"name" : "l_tax",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 9,
"name" : "l_returnflag",
"required" : false,
"type" : "string"
}, {
"id" : 10,
"name" : "l_linestatus",
"required" : false,
"type" : "string"
}, {
"id" : 11,
"name" : "l_shipdate",
"required" : false,
"type" : "date"
}, {
"id" : 12,
"name" : "l_commitdate",
"required" : false,
"type" : "date"
}, {
"id" : 13,
"name" : "l_receiptdate",
"required" : false,
"type" : "date"
}, {
"id" : 14,
"name" : "l_shipinstruct",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_shipmode",
"required" : false,
"type" : "string"
}, {
"id" : 16,
"name" : "l_comment",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "root",
"write.update.mode" : "merge-on-read"
},
"current-snapshot-id" : 3776207205136740581,
"refs" : {
"main" : {
"snapshot-id" : 3776207205136740581,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473674504,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"added-records" : "60175",
"added-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "60175",
"total-files-size" : "1390176",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1676473674504,
"snapshot-id" : 3776207205136740581
} ],
"metadata-log" : [ ]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"format-version" : 2,
"table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b",
"location" : "./lineitem_iceberg",
"last-sequence-number" : 2,
"last-updated-ms" : 1676473694730,
"last-column-id" : 16,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey",
"required" : false,
"type" : "int"
}, {
"id" : 2,
"name" : "l_partkey",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey",
"required" : false,
"type" : "int"
}, {
"id" : 4,
"name" : "l_linenumber",
"required" : false,
"type" : "int"
}, {
"id" : 5,
"name" : "l_quantity",
"required" : false,
"type" : "int"
}, {
"id" : 6,
"name" : "l_extendedprice",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 7,
"name" : "l_discount",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 8,
"name" : "l_tax",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 9,
"name" : "l_returnflag",
"required" : false,
"type" : "string"
}, {
"id" : 10,
"name" : "l_linestatus",
"required" : false,
"type" : "string"
}, {
"id" : 11,
"name" : "l_shipdate",
"required" : false,
"type" : "date"
}, {
"id" : 12,
"name" : "l_commitdate",
"required" : false,
"type" : "date"
}, {
"id" : 13,
"name" : "l_receiptdate",
"required" : false,
"type" : "date"
}, {
"id" : 14,
"name" : "l_shipinstruct",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_shipmode",
"required" : false,
"type" : "string"
}, {
"id" : 16,
"name" : "l_comment",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "root",
"write.update.mode" : "merge-on-read"
},
"current-snapshot-id" : 7635660646343998149,
"refs" : {
"main" : {
"snapshot-id" : 7635660646343998149,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473674504,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"added-records" : "60175",
"added-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "60175",
"total-files-size" : "1390176",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro",
"schema-id" : 0
}, {
"sequence-number" : 2,
"snapshot-id" : 7635660646343998149,
"parent-snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473694730,
"summary" : {
"operation" : "overwrite",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"deleted-data-files" : "1",
"added-records" : "51793",
"deleted-records" : "60175",
"added-files-size" : "1208539",
"removed-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "51793",
"total-files-size" : "1208539",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1676473674504,
"snapshot-id" : 3776207205136740581
}, {
"timestamp-ms" : 1676473694730,
"snapshot-id" : 7635660646343998149
} ],
"metadata-log" : [ {
"timestamp-ms" : 1676473674504,
"metadata-file" : "lineitem_iceberg/metadata/rev-1.metadata.json"
} ]
}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
9 changes: 9 additions & 0 deletions extension/iceberg/test/test_files/iceberg.test
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,12 @@ lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro|1|DATA|AD
3|8|1996-01-29|TAKE BACK RETURN
4|28|1996-04-21|NONE
5|24|1996-03-30|NONE

-LOG metadata_alter_name
-STATEMENT LOAD FROM '${KUZU_ROOT_DIRECTORY}/extension/iceberg/test/iceberg_tables/lineitem_iceberg_alter_name' (file_format='iceberg', allow_moved_paths=true, version_name_format = 'rev-%s.metadata.json%s' ) RETURN l_linenumber, l_quantity, l_shipdate, l_shipinstruct LIMIT 5;
---- 5
1|17|1996-03-13|DELIVER IN PERSON
2|36|1996-04-12|TAKE BACK RETURN
3|8|1996-01-29|TAKE BACK RETURN
4|28|1996-04-21|NONE
5|24|1996-03-30|NONE

0 comments on commit df1f60b

Please sign in to comment.