diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java index b7b96050..801dd0ef 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java @@ -45,17 +45,15 @@ public class S3PathCleaner implements PathCleaner { private IcebergValidator icebergValidator; public S3PathCleaner(S3Client s3Client, SentinelFilesCleaner sentinelFilesCleaner, - BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { + BytesDeletedReporter bytesDeletedReporter) { this.s3Client = s3Client; this.sentinelFilesCleaner = sentinelFilesCleaner; this.bytesDeletedReporter = bytesDeletedReporter; - this.icebergValidator = icebergValidator; } @Override @TimedTaggable("s3-paths-deleted") public void cleanupPath(HousekeepingEntity housekeepingEntity) { - icebergValidator.throwExceptionIfIceberg(housekeepingEntity.getDatabaseName(), housekeepingEntity.getTableName()); S3SchemeURI s3SchemeURI = new S3SchemeURI(housekeepingEntity.getPath()); String key = s3SchemeURI.getKey(); String bucket = s3SchemeURI.getBucket(); diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java index 2ce72451..c9d3d318 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java @@ -38,7 +38,6 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -58,8 +57,8 @@ class S3DryRunPathCleanerTest { private HousekeepingPath housekeepingPath; private AmazonS3 amazonS3; - @Mock private BytesDeletedReporter bytesDeletedReporter; - @Mock private IcebergValidator icebergValidator; + private @Mock BytesDeletedReporter bytesDeletedReporter; + private boolean dryRunEnabled = true; @@ -83,7 +82,7 @@ void setUp() { .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(bucket, object.getKey())); S3Client s3Client = new S3Client(amazonS3, dryRunEnabled); - s3DryRunPathCleaner = new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); + s3DryRunPathCleaner = new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); housekeepingPath = HousekeepingPath .builder() .path(absolutePath) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java index 6eddc429..102c424e 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java @@ -33,13 +33,13 @@ import java.util.List; import org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider; +import org.junit.Rule; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.utility.DockerImageName; @@ -55,10 +55,8 @@ import com.amazonaws.services.s3.model.S3ObjectSummary; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.FileSystemType; import com.expediagroup.beekeeper.core.error.BeekeeperException; -import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -80,16 +78,19 @@ class S3PathCleanerTest { private S3Client s3Client; private S3SentinelFilesCleaner s3SentinelFilesCleaner; private @Mock BytesDeletedReporter bytesDeletedReporter; - private @Mock IcebergValidator icebergValidator; + private S3PathCleaner s3PathCleaner; - @Container + @Rule public static LocalStackContainer awsContainer = new LocalStackContainer( DockerImageName.parse("localstack/localstack:0.14.2")).withServices(S3); + static { + awsContainer.start(); + } + public static String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); @BeforeEach void setUp() { - String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); amazonS3 = AmazonS3ClientBuilder .standard() .withCredentials(new BasicAWSCredentialsProvider("accesskey", "secretkey")) @@ -103,7 +104,7 @@ void setUp() { boolean dryRunEnabled = false; s3Client = new S3Client(amazonS3, dryRunEnabled); s3SentinelFilesCleaner = new S3SentinelFilesCleaner(s3Client); - s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter); String tableName = "table"; String databaseName = "database"; housekeepingPath = HousekeepingPath @@ -256,7 +257,7 @@ void sentinelFilesCleanerThrowsException() { amazonS3.putObject(bucket, key1, content); - s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter); assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)).doesNotThrowAnyException(); assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); } @@ -321,7 +322,7 @@ void sentinelFilesForParentsAndPathWithTrailingSlash() { @Test void noBytesDeletedMetricWhenFileDeletionFails() { S3Client mockS3Client = mock(S3Client.class); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); when(mockS3Client.doesObjectExist(bucket, key1)).thenReturn(true); ObjectMetadata objectMetadata = new ObjectMetadata(); objectMetadata.setContentLength(10); @@ -337,7 +338,7 @@ void noBytesDeletedMetricWhenFileDeletionFails() { @Test void noBytesDeletedMetricWhenDirectoryDeletionFails() { S3Client mockS3Client = mock(S3Client.class); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); doThrow(AmazonServiceException.class).when(mockS3Client).listObjects(bucket, keyRootAsDirectory); assertThatExceptionOfType(AmazonServiceException.class) @@ -350,7 +351,7 @@ void reportBytesDeletedWhenDirectoryDeletionPartiallyFails() { AmazonS3 mockAmazonS3 = mock(AmazonS3.class); S3Client mockS3Client = new S3Client(mockAmazonS3, false); mockOneOutOfTwoObjectsDeleted(mockAmazonS3); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); assertThatExceptionOfType(BeekeeperException.class) .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)) .withMessage(format("Not all files could be deleted at path \"%s/%s\"; deleted 1/2 objects. " @@ -367,37 +368,6 @@ void extractingURIFails() { .withMessage(format("'%s' is not an S3 path.", path)); } - @Test - void shouldThrowBeekeeperIcebergExceptionWhenIcebergTableDetected() { - doThrow(new BeekeeperIcebergException("Iceberg tables are not supported")) - .when(icebergValidator) - .throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); - - assertThatExceptionOfType(BeekeeperIcebergException.class) - .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)) - .withMessage("Iceberg tables are not supported"); - - verify(icebergValidator).throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); - verifyNoInteractions(bytesDeletedReporter); - } - - @Test - void shouldProceedWithDeletionWhenNotIcebergTable() { - amazonS3.putObject(bucket, key1, content); - amazonS3.putObject(bucket, key2, content); - - housekeepingPath.setPath("s3://" + bucket + "/" + keyRoot); - - assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)) - .doesNotThrowAnyException(); - - assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); - assertThat(amazonS3.doesObjectExist(bucket, key2)).isFalse(); - - long expectedBytesDeleted = content.getBytes().length * 2L; // 11 bytes('some content') * 2 = 22 bytes - verify(bytesDeletedReporter).reportTaggable(expectedBytesDeleted, housekeepingPath, FileSystemType.S3); - } - private void mockOneOutOfTwoObjectsDeleted(AmazonS3 mockAmazonS3) { S3ObjectSummary s3ObjectSummary = new S3ObjectSummary(); s3ObjectSummary.setBucketName(bucket); @@ -415,4 +385,4 @@ private void mockOneOutOfTwoObjectsDeleted(AmazonS3 mockAmazonS3) { when(mockAmazonS3.deleteObjects(any(DeleteObjectsRequest.class))) .thenReturn(new DeleteObjectsResult(List.of(deletedObject))); } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java index 9d7e5272..d00e4799 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java @@ -15,9 +15,6 @@ */ package com.expediagroup.beekeeper.integration; -import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; -import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; @@ -28,12 +25,10 @@ import java.sql.SQLException; import java.util.List; -import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.awaitility.Duration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -41,7 +36,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.extension.RegisterExtension; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; @@ -53,16 +47,12 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CreateBucketRequest; -import com.google.common.collect.ImmutableMap; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; -import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.integration.utils.TestAppender; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; -import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; - @Testcontainers @ExtendWith(MockitoExtension.class) public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrationTestBase { @@ -72,12 +62,6 @@ public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrat private static final String SCHEDULER_DELAY_MS_PROPERTY = "properties.scheduler-delay-ms"; private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String AWS_S3_ENDPOINT_PROPERTY = "aws.s3.endpoint"; - private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; - private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; - private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; - - private static final String S3_ACCESS_KEY = "access"; - private static final String S3_SECRET_KEY = "secret"; private static final String BUCKET = "test-path-bucket"; private static final String DB_AND_TABLE_PREFIX = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE; @@ -99,37 +83,17 @@ public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrat @Container private static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3); - static { - S3_CONTAINER.start(); - } private static AmazonS3 amazonS3; - private static final String S3_ENDPOINT = ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3); - private final ExecutorService executorService = Executors.newFixedThreadPool(1); private final TestAppender appender = new TestAppender(); - private static Map metastoreProperties = ImmutableMap - .builder() - .put(ENDPOINT, S3_ENDPOINT) - .put(ACCESS_KEY, S3_ACCESS_KEY) - .put(SECRET_KEY, S3_SECRET_KEY) - .build(); - - @RegisterExtension - public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( - DATABASE_NAME_VALUE, metastoreProperties); - private HiveTestUtils hiveTestUtils; - private HiveMetaStoreClient metastoreClient; - @BeforeAll public static void init() { System.setProperty(SPRING_PROFILES_ACTIVE_PROPERTY, SPRING_PROFILES_ACTIVE); System.setProperty(SCHEDULER_DELAY_MS_PROPERTY, SCHEDULER_DELAY_MS); System.setProperty(DRY_RUN_ENABLED_PROPERTY, DRY_RUN_ENABLED); - System.setProperty(AWS_S3_ENDPOINT_PROPERTY, S3_ENDPOINT); - System.setProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY, "true"); - System.setProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY, "true"); + System.setProperty(AWS_S3_ENDPOINT_PROPERTY, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3)); amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION); amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION)); @@ -141,18 +105,12 @@ public static void teardown() { System.clearProperty(SCHEDULER_DELAY_MS_PROPERTY); System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); - System.clearProperty(METASTORE_URI_PROPERTY); amazonS3.shutdown(); - S3_CONTAINER.stop(); } @BeforeEach public void setup() { - System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); - metastoreClient = thriftHiveMetaStore.client(); - hiveTestUtils = new HiveTestUtils(metastoreClient); - amazonS3.listObjectsV2(BUCKET) .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey())); @@ -289,4 +247,4 @@ private void assertS3ClientLogs(int expected) { } assertThat(logsFromS3Client).isEqualTo(expected); } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index dd1be3b7..257760e4 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -15,41 +15,32 @@ */ package com.expediagroup.beekeeper.integration; -import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; -import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; import static com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter.METRIC_NAME; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DELETED; -import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.thrift.TException; import org.awaitility.Duration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -60,17 +51,10 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CreateBucketRequest; -import com.google.common.collect.ImmutableMap; -import com.expediagroup.beekeeper.core.model.HousekeepingEntity; -import com.expediagroup.beekeeper.core.model.HousekeepingPath; -import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; -import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; -import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; - @Testcontainers public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTestBase { @@ -79,12 +63,6 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String SCHEDULER_DELAY_MS_PROPERTY = "properties.scheduler-delay-ms"; private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String AWS_S3_ENDPOINT_PROPERTY = "aws.s3.endpoint"; - private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; - private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; - private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; - - private static final String S3_ACCESS_KEY = "access"; - private static final String S3_SECRET_KEY = "secret"; private static final String BUCKET = "test-path-bucket"; private static final String DB_AND_TABLE_PREFIX = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE; @@ -93,7 +71,6 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String OBJECT_KEY2 = DB_AND_TABLE_PREFIX + "/id1/partition1/file2"; private static final String OBJECT_KEY_SENTINEL = DB_AND_TABLE_PREFIX + "/id1/partition1_$folder$"; private static final String ABSOLUTE_PATH = "s3://" + BUCKET + "/" + OBJECT_KEY_ROOT; - private static final String TABLE_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; private static final String OBJECT_KEY_OTHER = DB_AND_TABLE_PREFIX + "/id1/partition10/file1"; private static final String OBJECT_KEY_OTHER_SENTINEL = DB_AND_TABLE_PREFIX + "/id1/partition10_$folder$"; @@ -107,35 +84,16 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes @Container private static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3); - static { - S3_CONTAINER.start(); - } private static AmazonS3 amazonS3; - private static final String S3_ENDPOINT = ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3); - private final ExecutorService executorService = Executors.newFixedThreadPool(1); - - private static Map metastoreProperties = ImmutableMap - .builder() - .put(ENDPOINT, S3_ENDPOINT) - .put(ACCESS_KEY, S3_ACCESS_KEY) - .put(SECRET_KEY, S3_SECRET_KEY) - .build(); - @RegisterExtension - public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( - DATABASE_NAME_VALUE, metastoreProperties); - - private HiveTestUtils hiveTestUtils; - private HiveMetaStoreClient metastoreClient; + private final ExecutorService executorService = Executors.newFixedThreadPool(1); @BeforeAll public static void init() { System.setProperty(SPRING_PROFILES_ACTIVE_PROPERTY, SPRING_PROFILES_ACTIVE); System.setProperty(SCHEDULER_DELAY_MS_PROPERTY, SCHEDULER_DELAY_MS); System.setProperty(DRY_RUN_ENABLED_PROPERTY, DRY_RUN_ENABLED); - System.setProperty(AWS_S3_ENDPOINT_PROPERTY, S3_ENDPOINT); - System.setProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY, "true"); - System.setProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY, "true"); + System.setProperty(AWS_S3_ENDPOINT_PROPERTY, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3)); amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION); amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION)); @@ -147,20 +105,12 @@ public static void teardown() { System.clearProperty(SCHEDULER_DELAY_MS_PROPERTY); System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); - System.clearProperty(METASTORE_URI_PROPERTY); - System.clearProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY); - System.clearProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY); amazonS3.shutdown(); - S3_CONTAINER.stop(); } @BeforeEach public void setup() { - System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); - metastoreClient = thriftHiveMetaStore.client(); - hiveTestUtils = new HiveTestUtils(metastoreClient); - amazonS3.listObjectsV2(BUCKET) .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey())); @@ -176,8 +126,7 @@ public void stop() throws InterruptedException { } @Test - public void cleanupPathsForFile() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForFile() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); @@ -194,8 +143,7 @@ public void cleanupPathsForFile() throws SQLException, TException { } @Test - public void cleanupPathsForDirectory() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForDirectory() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -214,8 +162,7 @@ public void cleanupPathsForDirectory() throws SQLException, TException { } @Test - public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForDirectoryWithSpace() throws SQLException { String objectKeyRoot = DB_AND_TABLE_PREFIX + "/ /id1/partition1"; String objectKey1 = objectKeyRoot + "/file1"; String objectKey2 = objectKeyRoot + "/file2"; @@ -235,8 +182,7 @@ public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException } @Test - public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -253,8 +199,7 @@ public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TEx } @Test - public void cleanupSentinelForParent() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupSentinelForParent() throws SQLException { String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; String databaseSentinel = "database_$folder$"; @@ -278,8 +223,7 @@ public void cleanupSentinelForParent() throws SQLException, TException { } @Test - public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupSentinelForNonEmptyParent() throws SQLException { String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); @@ -301,32 +245,7 @@ public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { } @Test - public void shouldSkipCleanupForIcebergTable() throws Exception { - Map tableProperties = new HashMap<>(); - tableProperties.put("table_type", "ICEBERG"); - tableProperties.put("format", "ICEBERG/PARQUET"); - String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; - - hiveTestUtils.createTableWithProperties( - TABLE_PATH, TABLE_NAME_VALUE, false, tableProperties, outputFormat, true); - - String objectKey = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/file1"; - String path = "s3://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; - - amazonS3.putObject(BUCKET, objectKey, CONTENT); - insertUnreferencedPath(path); - - await().atMost(TIMEOUT, TimeUnit.SECONDS) - .until(() -> getUnreferencedPaths().get(0).getHousekeepingStatus() == SKIPPED); - - assertThat(amazonS3.doesObjectExist(BUCKET, objectKey)) - .withFailMessage("S3 object %s should still exist as cleanup was skipped.", objectKey) - .isTrue(); - } - - @Test - public void metrics() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void metrics() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); @@ -364,4 +283,4 @@ public void prometheus() { await().atMost(TIMEOUT, TimeUnit.SECONDS) .until(() -> client.execute(request).getStatusLine().getStatusCode() == 200); } -} +} \ No newline at end of file diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java index 0dd2fc94..b5ddbb2a 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java @@ -145,7 +145,7 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled PathCleaner pathCleaner( S3Client s3Client, BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { - return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); + return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); } @Bean(name = "expiredMetadataHandler") diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index a94bfaca..5d324fb5 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -10,12 +10,6 @@ beekeeper-path-cleanup - - 2.8.1 - 2.3.7 - UTF-8 - - com.amazonaws @@ -74,21 +68,6 @@ 27.1-jre - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.version} - - - org.slf4j - slf4j-log4j12 - - - javax.servlet - servlet-api - - - diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index eff5fac8..b8715a2f 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -16,9 +16,7 @@ package com.expediagroup.beekeeper.path.cleanup.context; import java.util.List; -import java.util.function.Supplier; -import org.apache.hadoop.hive.conf.HiveConf; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.domain.EntityScan; import org.springframework.context.annotation.Bean; @@ -37,23 +35,16 @@ import com.expediagroup.beekeeper.cleanup.aws.S3Client; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; import com.expediagroup.beekeeper.cleanup.aws.S3SentinelFilesCleaner; -import com.expediagroup.beekeeper.cleanup.hive.HiveClientFactory; -import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; import com.expediagroup.beekeeper.path.cleanup.handler.GenericPathHandler; import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; import com.expediagroup.beekeeper.path.cleanup.service.PathRepositoryCleanupService; -import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; -import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; -import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; - @Configuration @EnableScheduling @ComponentScan({ "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.cleanup" }) @@ -94,9 +85,8 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled @Bean(name = "s3PathCleaner") PathCleaner pathCleaner( S3Client s3Client, - BytesDeletedReporter bytesDeletedReporter, - IcebergValidator icebergValidator) { - return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); + BytesDeletedReporter bytesDeletedReporter) { + return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); } @Bean @@ -118,35 +108,4 @@ RepositoryCleanupService repositoryCleanupService( DisableTablesService disableTablesService() { return () -> {}; } - - @Bean - public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { - HiveConf conf = new HiveConf(); - conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); - return conf; - } - - @Bean - public CloseableMetaStoreClientFactory metaStoreClientFactory() { - return new CloseableMetaStoreClientFactory(); - } - - @Bean - Supplier metaStoreClientSupplier( - CloseableMetaStoreClientFactory metaStoreClientFactory, HiveConf hiveConf) { - String name = "beekeeper-scheduler-apiary"; - return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); - } - - @Bean(name = "hiveClientFactory") - public CleanerClientFactory clientFactory( - Supplier metaStoreClientSupplier, - @Value("${properties.dry-run-enabled}") boolean dryRunEnabled) { - return new HiveClientFactory(metaStoreClientSupplier, dryRunEnabled); - } - - @Bean - public IcebergValidator icebergValidator(CleanerClientFactory clientFactory) { - return new IcebergValidator(clientFactory); - } -} +} \ No newline at end of file diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index efe70173..85ed3182 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -24,7 +24,6 @@ import org.springframework.data.domain.Slice; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; -import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @@ -68,19 +67,12 @@ public Pageable processPage(Pageable pageable, Slice page, boo } private boolean cleanUpPath(HousekeepingPath housekeepingPath) { - try { - if (S3PathValidator.validTablePath(housekeepingPath.getPath())) { - pathCleaner.cleanupPath(housekeepingPath); - return true; - } - log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); - return false; - } catch (BeekeeperException e) { - updateStatus(housekeepingPath, HousekeepingStatus.SKIPPED); - log.warn("Skipping cleanup for table \"{}.{}\": {}", housekeepingPath.getDatabaseName(), - housekeepingPath.getTableName(), e.getMessage()); - return false; + if (S3PathValidator.validTablePath(housekeepingPath.getPath())) { + pathCleaner.cleanupPath(housekeepingPath); + return true; } + log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); + return false; } private void cleanupContent(HousekeepingPath housekeepingPath) { @@ -107,4 +99,4 @@ private void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } -} +} \ No newline at end of file diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java index 4db85925..967549b5 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2021 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,6 @@ import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; import com.expediagroup.beekeeper.path.cleanup.service.PathRepositoryCleanupService; @@ -60,7 +59,6 @@ class CommonBeansTest { private final CommonBeans commonBeans = new CommonBeans(); private @Mock HousekeepingPathRepository repository; private @Mock BytesDeletedReporter bytesDeletedReporter; - private @Mock IcebergValidator icebergValidator; @BeforeEach void setUp() { @@ -102,7 +100,7 @@ void verifyS3pathCleaner() { S3Client s3Client = commonBeans.s3Client(commonBeans.amazonS3(), dryRunEnabled); MeterRegistry meterRegistry = mock(GraphiteMeterRegistry.class); - PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, bytesDeletedReporter, icebergValidator); + PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, bytesDeletedReporter); assertThat(pathCleaner).isInstanceOf(S3PathCleaner.class); } @@ -123,4 +121,4 @@ public void verifyDisableTablesService() { DisableTablesService disableTablesService = commonBeans.disableTablesService(); assertThat(disableTablesService).isNotNull(); } -} +} \ No newline at end of file