From 06914d1d42e2abbdcb55561bdc25f631cb6a5e63 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Fri, 22 Nov 2024 12:24:24 +0000 Subject: [PATCH] Add integration test for metadatacleanup --- ...ekeeperMetadataCleanupIntegrationTest.java | 26 +++++++++++++ .../integration/utils/HiveTestUtils.java | 37 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java index d0e52df9..8e88682e 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java @@ -26,6 +26,7 @@ import static com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter.METRIC_NAME; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DELETED; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DISABLED; +import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.LONG_CLEANUP_DELAY_VALUE; @@ -33,6 +34,7 @@ import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -225,6 +227,30 @@ public void cleanupPartitionedTable() throws Exception { assertThat(amazonS3.doesObjectExist(BUCKET, PARTITIONED_OBJECT_KEY)).isFalse(); } + @Test + public void shouldSkipCleanupForIcebergTable() throws Exception { + // Define custom table props and outputFormat for an Iceberg table + Map tableProperties = new HashMap<>(); + tableProperties.put("table_type", "ICEBERG"); + tableProperties.put("format", "ICEBERG/PARQUET"); + String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; + // Create the Iceberg table in the Hive metastore + hiveTestUtils.createTableWithProperties( + PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, tableProperties, outputFormat, true); + // Add data to the S3 bucket + amazonS3.putObject(BUCKET, PARTITIONED_TABLE_OBJECT_KEY, TABLE_DATA); + // Insert expired metadata for the Iceberg table + insertExpiredMetadata(PARTITIONED_TABLE_PATH, null); + // wait for cleanup process to run + await() + .atMost(TIMEOUT, TimeUnit.SECONDS) + .until(() -> getExpiredMetadata().get(0).getHousekeepingStatus() == SKIPPED); + // Verify that the table still exists + assertThat(metastoreClient.tableExists(DATABASE_NAME_VALUE, TABLE_NAME_VALUE)).isTrue(); // this is fine, the table is not changed + // Verify that the data in S3 is still present + assertThat(amazonS3.doesObjectExist(BUCKET, PARTITIONED_TABLE_OBJECT_KEY)).isTrue(); // this too is fine, the data is not changed + } + @Test public void cleanupPartitionButNotTable() throws Exception { Table table = hiveTestUtils.createTable(PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java index 896efe25..8dd53b15 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; @@ -111,4 +112,40 @@ private Partition newTablePartition(Table hiveTable, List values, URI lo partition.getSd().setLocation(location.toString()); return partition; } + + public Table createTableWithProperties(String path, String tableName, boolean partitioned, Map tableProperties, String outputFormat, boolean withBeekeeperProperty) + throws TException { + Table hiveTable = new Table(); + hiveTable.setDbName(DATABASE_NAME_VALUE); + hiveTable.setTableName(tableName); + hiveTable.setTableType(TableType.EXTERNAL_TABLE.name()); + hiveTable.putToParameters("EXTERNAL", "TRUE"); + + // Add custom table props + if (tableProperties != null) { + hiveTable.getParameters().putAll(tableProperties); + } + if (withBeekeeperProperty) { + hiveTable.putToParameters(LifecycleEventType.EXPIRED.getTableParameterName(), "true"); + } + if (partitioned) { + hiveTable.setPartitionKeys(PARTITION_COLUMNS); + } + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(DATA_COLUMNS); + sd.setLocation(path); + sd.setParameters(new HashMap<>()); + // Set the output format for the storage descriptor, defaulting to TextOutputFormat if not specified + if (outputFormat != null) { + sd.setOutputFormat(outputFormat); + } else { + sd.setOutputFormat(TextOutputFormat.class.getName()); + } + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde"); + hiveTable.setSd(sd); + metastoreClient.createTable(hiveTable); + + return hiveTable; + } }