From d39ec1101992b4d54a5e327d37a57c080c97d4fe Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 11 Nov 2024 00:42:33 +0000 Subject: [PATCH 1/8] Initial Commit to fix actions on iceberg tables --- ...etadataSchedulerApiaryIntegrationTest.java | 72 +++++++++-- ...cedPathSchedulerApiaryIntegrationTest.java | 73 +++++++++--- .../integration/model/SqsMessage.java | 8 +- .../integration/model/SqsMessageTest.java | 12 ++ .../IcebergTableListenerEventFilter.java | 54 +++++++++ .../apiary/service/SchedulerApiary.java | 69 +++++++++-- .../IcebergTableListenerEventFilterTest.java | 112 ++++++++++++++++++ .../apiary/service/SchedulerApiaryTest.java | 29 ++++- .../predicate/IsIcebergTablePredicate.java | 38 ++++++ .../IsIcebergTablePredicateTest.java | 57 +++++++++ 10 files changed, 483 insertions(+), 41 deletions(-) create mode 100644 beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java create mode 100644 beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java create mode 100644 beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java create mode 100644 beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index ccbf19c6..3e888084 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,7 +126,8 @@ public void expiredMetadataCreateTableEvent() throws SQLException, IOException, await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null, true); + // assertMetrics() accepts a boolean value now so we can verify if metadata-scheduled is not present } @Test @@ -139,7 +140,7 @@ public void expiredMetadataAlterTableEvent() throws SQLException, IOException, U await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null, true); } @Test @@ -156,7 +157,7 @@ public void expiredMetadataAddPartitionEvent() throws SQLException, IOException, List expiredMetadata = getExpiredMetadata(); // check first entry is for the table assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME, true); } @Test @@ -176,8 +177,8 @@ public void expiredMetadataMultipleAddPartitionEvents() throws SQLException, IOE List expiredMetadata = getExpiredMetadata(); // check first entry is for the table assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME); - assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME, true); + assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME, true); } @Test @@ -191,7 +192,7 @@ public void expiredMetadataAlterPartitionTableEvent() throws SQLException, IOExc await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME, true); } @Test @@ -209,8 +210,26 @@ public void expiredMetadataMultipleAlterPartitionTableEvents() throws SQLExcepti await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 2); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME, true); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME, true); + } + + @Test + public void expiredMetadataIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { + //create a message for an Iceberg table by including table_type=ICEBERG in the payload + CreateTableSqsMessage createIcebergTableSqsMessage = new CreateTableSqsMessage(LOCATION_A, true); + createIcebergTableSqsMessage.setTableType("ICEBERG"); + amazonSQS.sendMessage(sendMessageRequest(createIcebergTableSqsMessage.getFormattedString())); + // wait for SchedulerApiary to process message + await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 0); + // asserts that no expired metadata was scheduled + List expiredMetadata = getExpiredMetadata(); + assertThat(expiredMetadata).isEmpty(); + // verify metrics (updated assertMetrics) below + assertMetrics(false); + // assert the event was deleted from the queue + int queueSize = getSqsQueueSize(); + assertThat(queueSize).isEqualTo(0); } @Test @@ -233,9 +252,9 @@ private SendMessageRequest sendMessageRequest(String payload) { return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } - private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName) { + private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName, boolean expectScheduledExpiredMetric) { assertHousekeepingMetadata(actual, expectedPath, partitionName); - assertMetrics(); + assertMetrics(expectScheduledExpiredMetric); } public void assertHousekeepingMetadata( @@ -256,13 +275,40 @@ public void assertHousekeepingMetadata( assertThat(actual.getLifecycleType()).isEqualTo(EXPIRED.toString()); } - public void assertMetrics() { + /** + * Previously, assertExpiredMetadata didn't differentiate between whether specific metrics (e.g., metadata-scheduled) were expected to be present or not + * Adding boolean param allows us to check if SCHEDULED_EXPIRED_METRIC exists + * This allows me to check if the metadata-scheduled is present in expiredMetadataIcebergTableEventIsFiltered test. + */ + public void assertMetrics(boolean expectScheduledExpiredMetric) { Set meterRegistry = ((CompositeMeterRegistry) BeekeeperSchedulerApiary.meterRegistry()) .getRegistries(); assertThat(meterRegistry).hasSize(2); meterRegistry.forEach(registry -> { List meters = registry.getMeters(); - assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_EXPIRED_METRIC); + if (expectScheduledExpiredMetric) { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .contains(SCHEDULED_EXPIRED_METRIC); + } else { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .doesNotContain(SCHEDULED_EXPIRED_METRIC); + } }); } + + // retrieves the current number of messages to check if the event has been added to the SQS queue or successfully ignored + private int getSqsQueueSize() { + String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); + // fetch the number of messages + String approximateNumberOfMessages = amazonSQS.getQueueAttributes(queueUrl, List.of("ApproximateNumberOfMessages")) + .getAttributes() + .get("ApproximateNumberOfMessages"); + + //return the count as an integer + return approximateNumberOfMessages != null && !approximateNumberOfMessages.isEmpty() + ? Integer.parseInt(approximateNumberOfMessages) + : 0; + } } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 92a3d0c0..4a831f36 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,6 +61,7 @@ import com.expediagroup.beekeeper.core.model.PeriodDuration; import com.expediagroup.beekeeper.integration.model.AlterPartitionSqsMessage; import com.expediagroup.beekeeper.integration.model.AlterTableSqsMessage; +//import com.expediagroup.beekeeper.integration.model.CreateTableSqsMessage; import com.expediagroup.beekeeper.integration.model.DropPartitionSqsMessage; import com.expediagroup.beekeeper.integration.model.DropTableSqsMessage; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; @@ -68,8 +69,9 @@ @Testcontainers public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { - - private static final int TIMEOUT = 5; + // changes similar to BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest + private static final int TIMEOUT = 30; + // updated to match BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest, asynchronous operations so 5 seconds might not be enough private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; private static final String QUEUE = "apiary-receiver-queue"; @@ -118,7 +120,7 @@ public void unreferencedAlterTableEvent() throws SQLException, IOException, URIS await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 1); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation", true); } @Test @@ -132,8 +134,8 @@ public void unreferencedMultipleAlterTableEvents() throws SQLException, IOExcept await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation", true); } @Test @@ -149,8 +151,8 @@ public void unreferencedAlterPartitionEvent() throws SQLException, IOException, await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation", true); } @Test @@ -165,8 +167,8 @@ public void unreferencedMultipleAlterPartitionEvent() throws IOException, SQLExc await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation", true); } @Test @@ -179,8 +181,8 @@ public void unreferencedDropPartitionEvent() throws SQLException, IOException, U await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/partitionLocation2"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/partitionLocation2", true); } @Test @@ -192,8 +194,24 @@ public void unreferencedDropTableEvent() throws SQLException, IOException, URISy await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/tableLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation2"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/tableLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation2", true); + } + + @Test + public void unreferencedIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { + DropTableSqsMessage dropIcebergTableSqsMessage = new DropTableSqsMessage("s3://bucket/icebergTableLocation", true, true); + dropIcebergTableSqsMessage.setTableType("ICEBERG"); + amazonSQS.sendMessage(sendMessageRequest(dropIcebergTableSqsMessage.getFormattedString())); + + await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 0); + + List unreferencedPaths = getUnreferencedPaths(); + assertThat(unreferencedPaths).isEmpty(); + assertMetrics(false); + + int queueSize = getSqsQueueSize(); + assertThat(queueSize).isEqualTo(0); } @Test @@ -216,9 +234,9 @@ private SendMessageRequest sendMessageRequest(String payload) { return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } - private void assertUnreferencedPath(HousekeepingPath actual, String expectedPath) { + private void assertUnreferencedPath(HousekeepingPath actual, String expectedPath, boolean expectScheduledUnreferencedMetric) { assertHousekeepingEntity(actual, expectedPath); - assertMetrics(); + assertMetrics(expectScheduledUnreferencedMetric); } public void assertHousekeepingEntity(HousekeepingPath actual, String expectedPath) { @@ -235,13 +253,32 @@ public void assertHousekeepingEntity(HousekeepingPath actual, String expectedPat assertThat(actual.getLifecycleType()).isEqualTo(UNREFERENCED.toString()); } - public void assertMetrics() { + public void assertMetrics(boolean expectScheduledUnreferencedMetric) { Set meterRegistry = ((CompositeMeterRegistry) BeekeeperSchedulerApiary.meterRegistry()) .getRegistries(); assertThat(meterRegistry).hasSize(2); meterRegistry.forEach(registry -> { List meters = registry.getMeters(); - assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_ORPHANED_METRIC); + if (expectScheduledUnreferencedMetric) { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .contains(SCHEDULED_ORPHANED_METRIC); + } else { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .doesNotContain(SCHEDULED_ORPHANED_METRIC); + } }); } + + private int getSqsQueueSize() { + String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); + String approximateNumberOfMessages = amazonSQS.getQueueAttributes(queueUrl, List.of("ApproximateNumberOfMessages")) + .getAttributes() + .get("ApproximateNumberOfMessages"); + + return approximateNumberOfMessages != null && !approximateNumberOfMessages.isEmpty() + ? Integer.parseInt(approximateNumberOfMessages) + : 0; + } } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index c51b2513..08d6a09a 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,4 +107,10 @@ public final String getFormattedString() { public JsonObject getApiaryEventMessageJsonObject() { return apiaryEventMessageJsonObject; } + + //enable the setting of the table_type parameter in SQS messages, to allow tests to simulate events for Iceberg/non-Iceberg tables. + public void setTableType(String tableType) { + JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); + tableParameters.add("table_type", new JsonPrimitive(tableType)); + } } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java index 2b2f0738..3f4b8077 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java @@ -107,4 +107,16 @@ private void assertKeys(SqsMessage sqsMessage, Set specificKeys, String assertThat(object.get("eventType").getAsString()).isEqualTo(eventType); assertThat(object.keySet()).isEqualTo(mergedSet); } + + @Test + public void testSetTableType() throws IOException, URISyntaxException { + CreateTableSqsMessage message = new CreateTableSqsMessage(DUMMY_LOCATION, true); + + message.setTableType("ICEBERG"); + + JsonObject object = message.getApiaryEventMessageJsonObject(); + JsonObject tableParameters = object.getAsJsonObject("tableParameters"); + + assertThat(tableParameters.get("table_type").getAsString()).isEqualTo("ICEBERG"); + } } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java new file mode 100644 index 00000000..5d4bbfa4 --- /dev/null +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -0,0 +1,54 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.apiary.filter; + +import java.util.Map; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; + +import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.service.predicate.IsIcebergTablePredicate; + +// Class to intercept and filter events received and determine whether they should be processed or not +@Component +public class IcebergTableListenerEventFilter implements ListenerEventFilter { + + private static final Logger log = LogManager.getLogger(IcebergTableListenerEventFilter.class); + + private final IsIcebergTablePredicate isIcebergTablePredicate; + + @Autowired + public IcebergTableListenerEventFilter(IsIcebergTablePredicate predicate) { + this.isIcebergTablePredicate = predicate; + // inject and assign predicate + } + + @Override + public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { + Map tableParameters = event.getTableParameters(); + if (isIcebergTablePredicate.test(tableParameters)) { + log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); + // Logging added as per ticket + return true; + } + return false; + } +} diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index 4cc1e165..9560a48a 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,35 +26,76 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; +import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; + import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; +// class scheduling housekeeping events based on Beekeeper events + +/** + * original flow of the class: + * read beekeperevent → for every housekeepingentity → determine lifecycleeventtype → schedule housekeeping → delete event + * + * updated flow of the class: + * read beekeperevent → extract messageevent → extract listenerevent → determine lifecycleeventtype → + * if iceberg table → ignore & delete event + * else → for every housekeepingentity → determine lifecycleeventtype → schedule housekeeping → delete event + * + */ + @Component public class SchedulerApiary { private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; + private final IcebergTableListenerEventFilter icebergTableListenerEventFilter; @Autowired public SchedulerApiary( BeekeeperEventReader beekeeperEventReader, - EnumMap schedulerServiceMap + EnumMap schedulerServiceMap, + IcebergTableListenerEventFilter icebergTableListenerEventFilter ) { this.beekeeperEventReader = beekeeperEventReader; this.schedulerServiceMap = schedulerServiceMap; + this.icebergTableListenerEventFilter = icebergTableListenerEventFilter; } @Transactional public void scheduleBeekeeperEvent() { - Optional housekeepingEntitiesToBeScheduled = beekeeperEventReader.read(); - if (housekeepingEntitiesToBeScheduled.isEmpty()) { return; } - BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); + Optional beekeeperEventOptional = beekeeperEventReader.read(); + if (beekeeperEventOptional.isEmpty()) { + return; + } + + // extract the messageEvent from beekeeperEvent so we can extract ListenerEvent + // to provide information about the event inc table params to pass to icebergTableListenerEventFilter + BeekeeperEvent beekeeperEvent = beekeeperEventOptional.get(); + MessageEvent messageEvent = beekeeperEvent.getMessageEvent(); + ListenerEvent listenerEvent = messageEvent.getEvent(); + List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); + // we didn't check if housekeepingEntities was empty before, might lead to silent failures? + if (housekeepingEntities.isEmpty()) { + throw new BeekeeperException("No housekeeping entities found in the event"); + } + LifecycleEventType lifecycleEventType = LifecycleEventType.valueOf(housekeepingEntities.get(0).getLifecycleType()); + + // apply Iceberg table filter + if (icebergTableListenerEventFilter.isFiltered(listenerEvent, lifecycleEventType)) { + // delete event and skip processing + beekeeperEventReader.delete(beekeeperEvent); + return; + } + for (HousekeepingEntity entity : housekeepingEntities) { try { LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); @@ -63,14 +104,28 @@ public void scheduleBeekeeperEvent() { } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", - entity.getLifecycleType()), - e); + entity.getLifecycleType()), e); } } beekeeperEventReader.delete(beekeeperEvent); } + /** + * Was thinking I can extract some of the logic into this to simplify main method, thoughts? + * + * private LifecycleEventType getLifecycleEventType(BeekeeperEvent beekeeperEvent) { + * List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); + * if (!housekeepingEntities.isEmpty()) { + * String lifecycleType = housekeepingEntities.get(0).getLifecycleType(); + * return LifecycleEventType.valueOf(lifecycleType); + * } + * // Handle the case where there are no housekeeping entities + * throw new BeekeeperException("No housekeeping entities found in the event"); + * } + * + */ + public void close() throws IOException { beekeeperEventReader.close(); } diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java new file mode 100644 index 00000000..9645a6f5 --- /dev/null +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -0,0 +1,112 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.apiary.filter; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Map; +import java.util.HashMap; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; + +import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.service.predicate.IsIcebergTablePredicate; + +// class to test the IcebergTableListenerEventFilter +public class IcebergTableListenerEventFilterTest { + + private IcebergTableListenerEventFilter filter; + private IsIcebergTablePredicate predicate; + + @BeforeEach + public void setUp() { + predicate = new IsIcebergTablePredicate(); + filter = new IcebergTableListenerEventFilter(predicate); + } + + @Test + void testIsFilteredIcebergTable() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "ICEBERG"); + + ListenerEvent listenerEvent = createListenerEvent("database", "iceberg_table", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertTrue(result, "Iceberg tables should be filtered out."); + } + + @Test + void testIsFilteredNonIcebergTable() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "MANAGED"); + + ListenerEvent listenerEvent = createListenerEvent("database", "hive_table", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertFalse(result, "Non-Iceberg tables should not be filtered out."); + } + + @Test + void testIsFilteredNullTableType() { + Map tableParameters = new HashMap<>(); + // we don't add table_type param + + ListenerEvent listenerEvent = createListenerEvent("database", "table_without_type", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertFalse(result, "Tables without 'table_type' should not be filtered out."); + } + + @Test + void testIsFilteredNullTableParameters() { + ListenerEvent listenerEvent = createListenerEvent("database", "table_without_parameters", null); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertFalse(result, "Tables with null parameters should not be filtered out."); + } + + // helper method to create a ListenerEvent + private ListenerEvent createListenerEvent(String dbName, String tableName, Map tableParameters) { + return new ListenerEvent() { + @Override + public String getDbName() { + return dbName; + } + + @Override + public String getTableName() { + return tableName; + } + + @Override + public Map getTableParameters() { + return tableParameters; + } + }; + } +} diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 31f48702..2d5dfcbc 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,6 +47,7 @@ import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.ExpiredHousekeepingMetadataSchedulerService; @@ -61,6 +62,7 @@ public class SchedulerApiaryTest { @Mock private BeekeeperEventReader beekeeperEventReader; @Mock private HousekeepingPath path; @Mock private HousekeepingMetadata table; + @Mock private IcebergTableListenerEventFilter icebergTableListenerEventFilter; private SchedulerApiary scheduler; @@ -69,14 +71,17 @@ public void init() { EnumMap schedulerMap = new EnumMap<>(LifecycleEventType.class); schedulerMap.put(UNREFERENCED, pathSchedulerService); schedulerMap.put(EXPIRED, tableSchedulerService); - scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap); + scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap, icebergTableListenerEventFilter); } @Test public void typicalPathSchedule() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); + scheduler.scheduleBeekeeperEvent(); + verify(pathSchedulerService).scheduleForHousekeeping(path); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -86,16 +91,34 @@ public void typicalPathSchedule() { public void typicalTableSchedule() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); + scheduler.scheduleBeekeeperEvent(); + verify(tableSchedulerService).scheduleForHousekeeping(table); verifyNoInteractions(pathSchedulerService); verify(beekeeperEventReader).delete(event.get()); } + @Test + public void icebergTableEventIsFiltered() { + HousekeepingEntity entity = path; // or table, as appropriate + Optional event = Optional.of(newHousekeepingEvent(entity, UNREFERENCED)); + when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(true); + + scheduler.scheduleBeekeeperEvent(); + + verifyNoInteractions(pathSchedulerService); + verifyNoInteractions(tableSchedulerService); + verify(beekeeperEventReader).delete(event.get()); + } + @Test public void typicalNoSchedule() { when(beekeeperEventReader.read()).thenReturn(Optional.empty()); scheduler.scheduleBeekeeperEvent(); + verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader, times(0)).delete(any()); @@ -105,6 +128,7 @@ public void typicalNoSchedule() { public void housekeepingPathRepositoryThrowsException() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); doThrow(new BeekeeperException("exception")).when(pathSchedulerService).scheduleForHousekeeping(path); try { @@ -125,6 +149,7 @@ public void housekeepingPathRepositoryThrowsException() { public void housekeepingTableRepositoryThrowsException() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); doThrow(new BeekeeperException("exception")).when(tableSchedulerService).scheduleForHousekeeping(table); try { diff --git a/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java new file mode 100644 index 00000000..c4ea25a3 --- /dev/null +++ b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java @@ -0,0 +1,38 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.service.predicate; + +import java.util.Map; +import java.util.function.Predicate; + +import org.springframework.stereotype.Component; + +// class to determine if a table is an iceberg table based on `table_type` parameter +@Component +public class IsIcebergTablePredicate implements Predicate> { + + private static final String TABLE_TYPE_KEY = "table_type"; + private static final String TABLE_TYPE_ICEBERG_VALUE = "ICEBERG"; + + @Override + public boolean test(Map tableParameters) { + if (tableParameters != null) { + String tableType = tableParameters.get(TABLE_TYPE_KEY); + return TABLE_TYPE_ICEBERG_VALUE.equalsIgnoreCase(tableType); + } + return false; + } +} diff --git a/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java new file mode 100644 index 00000000..eed45e24 --- /dev/null +++ b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.service.predicate; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +class IsIcebergTablePredicateTest { + + private IsIcebergTablePredicate predicate = new IsIcebergTablePredicate(); + + @Test + void testIsIcebergTableByTableType() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "ICEBERG"); + + assertThat(predicate.test(tableParameters)).isTrue(); + } + + @Test + void testIsNotIcebergTable() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "EXTERNAL"); + + assertThat(predicate.test(tableParameters)).isFalse(); + } + + @Test + void testNullParameters() { + assertThat(predicate.test(null)).isFalse(); + } + + @Test + void testEmptyParameters() { + Map tableParameters = Collections.emptyMap(); + + assertThat(predicate.test(tableParameters)).isFalse(); + } +} From f65f115c0cc1b6a8eb5e3aa9dec60d410ed3d22e Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 11 Nov 2024 14:27:56 +0000 Subject: [PATCH 2/8] Update to operate on a Table object rather than a map representing table params --- ...etadataSchedulerApiaryIntegrationTest.java | 2 + ...cedPathSchedulerApiaryIntegrationTest.java | 2 +- .../integration/model/SqsMessage.java | 18 ++- .../integration/model/SqsMessageTest.java | 54 +++++-- beekeeper-scheduler-apiary/pom.xml | 5 + .../IcebergTableListenerEventFilter.java | 29 +++- .../apiary/service/SchedulerApiary.java | 15 -- .../IcebergTableListenerEventFilterTest.java | 135 ++++++++++++++++-- beekeeper-scheduler/pom.xml | 59 ++++++++ .../predicate/IsIcebergTablePredicate.java | 32 ++++- .../IsIcebergTablePredicateTest.java | 91 ++++++++++-- 11 files changed, 381 insertions(+), 61 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index 3e888084..e39387bd 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -214,11 +214,13 @@ public void expiredMetadataMultipleAlterPartitionTableEvents() throws SQLExcepti assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME, true); } + // New test to check if expired metadata for Iceberg tables is filtered @Test public void expiredMetadataIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { //create a message for an Iceberg table by including table_type=ICEBERG in the payload CreateTableSqsMessage createIcebergTableSqsMessage = new CreateTableSqsMessage(LOCATION_A, true); createIcebergTableSqsMessage.setTableType("ICEBERG"); + createIcebergTableSqsMessage.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); amazonSQS.sendMessage(sendMessageRequest(createIcebergTableSqsMessage.getFormattedString())); // wait for SchedulerApiary to process message await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 0); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 4a831f36..7f2904ad 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -61,7 +61,6 @@ import com.expediagroup.beekeeper.core.model.PeriodDuration; import com.expediagroup.beekeeper.integration.model.AlterPartitionSqsMessage; import com.expediagroup.beekeeper.integration.model.AlterTableSqsMessage; -//import com.expediagroup.beekeeper.integration.model.CreateTableSqsMessage; import com.expediagroup.beekeeper.integration.model.DropPartitionSqsMessage; import com.expediagroup.beekeeper.integration.model.DropTableSqsMessage; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; @@ -202,6 +201,7 @@ public void unreferencedDropTableEvent() throws SQLException, IOException, URISy public void unreferencedIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { DropTableSqsMessage dropIcebergTableSqsMessage = new DropTableSqsMessage("s3://bucket/icebergTableLocation", true, true); dropIcebergTableSqsMessage.setTableType("ICEBERG"); + dropIcebergTableSqsMessage.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); amazonSQS.sendMessage(sendMessageRequest(dropIcebergTableSqsMessage.getFormattedString())); await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 0); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index 08d6a09a..cd1dc37e 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -99,6 +99,18 @@ public void setWhitelisted(boolean isWhitelisted) { tableParameters.add(BEEKEEPER_HIVE_EVENT_WHITELIST, new JsonPrimitive(whitelist)); } + //enable the setting of the table_type parameter in SQS messages, to allow tests to simulate events for Iceberg/non-Iceberg tables. + public void setTableType(String tableType) { + JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); + tableParameters.add("table_type", new JsonPrimitive(tableType)); + } + + // New method to set output_format + public void setOutputFormat(String outputFormat) { + JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); + tableParameters.add("output_format", new JsonPrimitive(outputFormat)); + } + public final String getFormattedString() { apiaryEventJsonObject.add(APIARY_EVENT_MESSAGE_KEY, new JsonPrimitive(apiaryEventMessageJsonObject.toString())); return apiaryEventJsonObject.toString(); @@ -107,10 +119,4 @@ public final String getFormattedString() { public JsonObject getApiaryEventMessageJsonObject() { return apiaryEventMessageJsonObject; } - - //enable the setting of the table_type parameter in SQS messages, to allow tests to simulate events for Iceberg/non-Iceberg tables. - public void setTableType(String tableType) { - JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); - tableParameters.add("table_type", new JsonPrimitive(tableType)); - } } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java index 3f4b8077..23095658 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,14 @@ public class SqsMessageTest { - private static final Set COMMON_KEYS = Set.of("protocolVersion", "eventType", "tableParameters", - "dbName", "tableName", "tableLocation"); + private static final Set COMMON_KEYS = Set.of( + "protocolVersion", + "eventType", + "tableParameters", + "dbName", + "tableName", + "tableLocation" + ); @Test public void testCreateTableFormat() throws IOException, URISyntaxException { @@ -50,9 +56,14 @@ public void testAddPartitionFormat() throws IOException, URISyntaxException { "partitionKeys", "partitionValues", "partitionLocation", - "tableParameters"); - AddPartitionSqsMessage message = new AddPartitionSqsMessage(DUMMY_LOCATION, DUMMY_PARTITION_KEYS, - DUMMY_PARTITION_VALUES, true); + "tableParameters" + ); + AddPartitionSqsMessage message = new AddPartitionSqsMessage( + DUMMY_LOCATION, + DUMMY_PARTITION_KEYS, + DUMMY_PARTITION_VALUES, + true + ); assertKeys(message, specificKeys, "ADD_PARTITION"); } @@ -63,9 +74,14 @@ public void testAlterPartitionFormat() throws IOException, URISyntaxException { "partitionValues", "partitionLocation", "oldPartitionValues", - "oldPartitionLocation"); - AlterPartitionSqsMessage message = new AlterPartitionSqsMessage(DUMMY_LOCATION, DUMMY_PARTITION_KEYS, - DUMMY_PARTITION_VALUES, true); + "oldPartitionLocation" + ); + AlterPartitionSqsMessage message = new AlterPartitionSqsMessage( + DUMMY_LOCATION, + DUMMY_PARTITION_KEYS, + DUMMY_PARTITION_VALUES, + true + ); assertKeys(message, specificKeys, "ALTER_PARTITION"); } @@ -73,7 +89,8 @@ public void testAlterPartitionFormat() throws IOException, URISyntaxException { public void testAlterTableFormat() throws IOException, URISyntaxException { Set specificKeys = Set.of( "oldTableName", - "oldTableLocation"); + "oldTableLocation" + ); AlterTableSqsMessage message = new AlterTableSqsMessage(DUMMY_LOCATION, true); assertKeys(message, specificKeys, "ALTER_TABLE"); } @@ -84,7 +101,8 @@ public void testDropPartitionFormat() throws IOException, URISyntaxException { "partitionKeys", "partitionValues", "partitionLocation", - "tableParameters"); + "tableParameters" + ); DropPartitionSqsMessage message = new DropPartitionSqsMessage(DUMMY_LOCATION, true, true); assertKeys(message, specificKeys, "DROP_PARTITION"); } @@ -119,4 +137,18 @@ public void testSetTableType() throws IOException, URISyntaxException { assertThat(tableParameters.get("table_type").getAsString()).isEqualTo("ICEBERG"); } + + //test method to verify setOutputFormat functionality + @Test + public void testSetOutputFormat() throws IOException, URISyntaxException { + CreateTableSqsMessage message = new CreateTableSqsMessage(DUMMY_LOCATION, true); + + String outputFormatValue = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; + message.setOutputFormat(outputFormatValue); + + JsonObject object = message.getApiaryEventMessageJsonObject(); + JsonObject tableParameters = object.getAsJsonObject("tableParameters"); + + assertThat(tableParameters.get("output_format").getAsString()).isEqualTo(outputFormatValue); + } } diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index bc492abf..e916c7e1 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -108,6 +108,11 @@ spring-boot-test test + + junit + junit + test + diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 5d4bbfa4..6cc457fd 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -17,6 +17,8 @@ import java.util.Map; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.beans.factory.annotation.Autowired; @@ -40,15 +42,34 @@ public IcebergTableListenerEventFilter(IsIcebergTablePredicate predicate) { this.isIcebergTablePredicate = predicate; // inject and assign predicate } - +// check if the table is an iceberg table and log if it is @Override public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { - Map tableParameters = event.getTableParameters(); - if (isIcebergTablePredicate.test(tableParameters)) { + Table table = createTableFromListenerEvent(event); + if (isIcebergTablePredicate.test(table)) { log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); - // Logging added as per ticket + // Logging when iceberg table is ignored, as per ticket return true; } return false; } + // create table from listener event to be used in predicate + private Table createTableFromListenerEvent(ListenerEvent event) { + // create table from listener event + Table table = new Table(); + table.setDbName(event.getDbName()); + table.setTableName(event.getTableName()); + table.setParameters(event.getTableParameters()); + StorageDescriptor sd = new StorageDescriptor(); + + Map tableParameters = event.getTableParameters(); // retrieve table params and assigns to map + String outputFormat = ""; // initialize output format + if (tableParameters != null) {// if table parameters are not null + outputFormat = tableParameters.getOrDefault("output_format", "");// get output format from table parameters + } + sd.setOutputFormat(outputFormat); // set output format in storafe descriptor + table.setSd(sd); // attach storage descriptor to table + + return table; + } } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index 9560a48a..b4904ae8 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -111,21 +111,6 @@ public void scheduleBeekeeperEvent() { beekeeperEventReader.delete(beekeeperEvent); } - /** - * Was thinking I can extract some of the logic into this to simplify main method, thoughts? - * - * private LifecycleEventType getLifecycleEventType(BeekeeperEvent beekeeperEvent) { - * List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); - * if (!housekeepingEntities.isEmpty()) { - * String lifecycleType = housekeepingEntities.get(0).getLifecycleType(); - * return LifecycleEventType.valueOf(lifecycleType); - * } - * // Handle the case where there are no housekeeping entities - * throw new BeekeeperException("No housekeeping entities found in the event"); - * } - * - */ - public void close() throws IOException { beekeeperEventReader.close(); } diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java index 9645a6f5..29fe13e0 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -15,11 +15,10 @@ */ package com.expediagroup.beekeeper.scheduler.apiary.filter; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; -import java.util.Map; import java.util.HashMap; +import java.util.Map; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -51,33 +50,32 @@ void testIsFilteredIcebergTable() { boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); - assertTrue(result, "Iceberg tables should be filtered out."); + assertThat(result).isTrue(); } @Test void testIsFilteredNonIcebergTable() { Map tableParameters = new HashMap<>(); - tableParameters.put("table_type", "MANAGED"); + tableParameters.put("table_type", "NICEBERG"); - ListenerEvent listenerEvent = createListenerEvent("database", "hive_table", tableParameters); + ListenerEvent listenerEvent = createListenerEvent("database", "niceberg_table", tableParameters); LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); - assertFalse(result, "Non-Iceberg tables should not be filtered out."); + assertThat(result).isFalse(); } @Test void testIsFilteredNullTableType() { Map tableParameters = new HashMap<>(); - // we don't add table_type param ListenerEvent listenerEvent = createListenerEvent("database", "table_without_type", tableParameters); LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); - assertFalse(result, "Tables without 'table_type' should not be filtered out."); + assertThat(result).isFalse(); } @Test @@ -87,7 +85,124 @@ void testIsFilteredNullTableParameters() { boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); - assertFalse(result, "Tables with null parameters should not be filtered out."); + assertThat(result).isFalse(); + } + + @Test + void testIsFilteredIcebergTableWithBothParameters() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "ICEBERG"); + tableParameters.put("output_format", "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + + ListenerEvent listenerEvent = createListenerEvent("database", "iceberg_table_both", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isTrue(); + } + + @Test + void testIsFilteredNonIcebergTableWithDifferentOutputFormat() { + Map tableParameters = new HashMap<>(); + tableParameters.put("output_format", "org.apache.not.an.ice.berg.table"); + + ListenerEvent listenerEvent = createListenerEvent("database", "non_iceberg_table_output_format", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isFalse(); + } + + @Test + void testIsFilteredNonIcebergTableWithDifferentTableType() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "NICEBERG"); + + ListenerEvent listenerEvent = createListenerEvent("database", "non_iceberg_table_type", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isFalse(); + } + + @Test + void testIsFilteredWithIcebergOutputFormatAndDifferentTableType() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "NICEBERG"); + tableParameters.put("output_format", "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + + ListenerEvent listenerEvent = createListenerEvent("database", "iceberg_output_non_iceberg_type", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isTrue(); + } + + @Test + void testIsFilteredTableWithIcebergTableTypeAndDifferentOutputFormat() { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "ICEBERG"); + tableParameters.put("output_format", "org.apache.not.an.ice.berg.table"); + + ListenerEvent listenerEvent = createListenerEvent("database", "iceberg_type_non_iceberg_output", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isTrue(); + } + + @Test + void testIsFilteredTableWithNullOutputFormat() { + Map tableParameters = new HashMap<>(); + tableParameters.put("output_format", null); // Explicitly setting output_format to null + + ListenerEvent listenerEvent = createListenerEvent("database", "table_null_output_format", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isFalse(); + } + + @Test + void testIsFilteredNonIcebergTableWithOutputFormatContainingIceberg() { + Map tableParameters = new HashMap<>(); + tableParameters.put("output_format", "org.apache.iceberg.mr.hive.NonIcebergOutputFormat"); // Contains "iceberg" but not Iceberg-specific + + ListenerEvent listenerEvent = createListenerEvent("database", "non_iceberg_with_iceberg_in_output_format", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isTrue(); + } + + @Test + void testIsFilteredTableWithNoRelevantParameters() { + Map tableParameters = new HashMap<>(); + tableParameters.put("some_other_param", "some_value"); + + ListenerEvent listenerEvent = createListenerEvent("database", "table_no_relevant_params", tableParameters); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isFalse(); + } + + @Test + void testIsFilteredTableWithNullParametersAndStorageDescriptor() { + ListenerEvent listenerEvent = createListenerEvent("database", "table_null_parameters", null); + LifecycleEventType lifecycleEventType = LifecycleEventType.EXPIRED; + + boolean result = filter.isFiltered(listenerEvent, lifecycleEventType); + + assertThat(result).isFalse(); } // helper method to create a ListenerEvent diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index ccdaf7bc..e924ab69 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -10,6 +10,11 @@ beekeeper-scheduler + + 2.3.7 + 1.4.2 + + com.expediagroup @@ -17,6 +22,54 @@ ${project.version} + + + org.apache.hive + hive-metastore + ${hive.version} + + + org.apache.hbase + hbase-client + + + org.slf4j + slf4j-log4j12 + + + log4j-slf4j-impl + org.apache.logging.log4j + + + junit + junit + + + org.eclipse.jetty.aggregate + jetty-all + + + org.eclipse.jetty.orbit + javax.servlet + + + javax.servlet + servlet-api + + + + + com.hotels + hcommon-hive-metastore + ${hcommon-hive-metastore.version} + + + net.java.dev.jna + jna + + + + ch.qos.logback logback-core @@ -29,5 +82,11 @@ org.springframework.boot spring-boot-starter-web + + org.projectlombok + lombok + ${lombok.version} + provided + diff --git a/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java index c4ea25a3..a080eab4 100644 --- a/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java +++ b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java @@ -15,23 +15,45 @@ */ package com.expediagroup.beekeeper.scheduler.service.predicate; +import static org.apache.commons.lang3.StringUtils.containsIgnoreCase; +import static org.apache.commons.lang3.StringUtils.equalsIgnoreCase; + import java.util.Map; import java.util.function.Predicate; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; import org.springframework.stereotype.Component; +import lombok.NonNull; + // class to determine if a table is an iceberg table based on `table_type` parameter +// based off IsIcbergTablePredicate class used in Icekeeper @Component -public class IsIcebergTablePredicate implements Predicate> { +public class IsIcebergTablePredicate implements Predicate { private static final String TABLE_TYPE_KEY = "table_type"; private static final String TABLE_TYPE_ICEBERG_VALUE = "ICEBERG"; @Override - public boolean test(Map tableParameters) { - if (tableParameters != null) { - String tableType = tableParameters.get(TABLE_TYPE_KEY); - return TABLE_TYPE_ICEBERG_VALUE.equalsIgnoreCase(tableType); + public boolean test(@NonNull Table table) { + return (hasSdProperty(table) || hasTableParameter(table)); + } +// check if the table has the output format property set to iceberg + private boolean hasSdProperty(Table table) { + StorageDescriptor sd = table.getSd(); + if (sd != null) { + String tableOutputFormat = sd.getOutputFormat(); + return containsIgnoreCase(tableOutputFormat, "iceberg"); + } + return false; + } +//retrieve the table parameters and check if the table type is ICEBERG + private boolean hasTableParameter(Table table) { + Map parameters = table.getParameters(); + if (parameters != null) { + String tableType = table.getParameters().get(TABLE_TYPE_KEY); + return equalsIgnoreCase(TABLE_TYPE_ICEBERG_VALUE, tableType); } return false; } diff --git a/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java index eed45e24..bf0f7e77 100644 --- a/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java +++ b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java @@ -16,11 +16,14 @@ package com.expediagroup.beekeeper.scheduler.service.predicate; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; import org.junit.jupiter.api.Test; class IsIcebergTablePredicateTest { @@ -29,29 +32,99 @@ class IsIcebergTablePredicateTest { @Test void testIsIcebergTableByTableType() { + Table table = new Table(); Map tableParameters = new HashMap<>(); tableParameters.put("table_type", "ICEBERG"); + table.setParameters(tableParameters); - assertThat(predicate.test(tableParameters)).isTrue(); + assertThat(predicate.test(table)).isTrue(); } @Test - void testIsNotIcebergTable() { + void testIsIcebergTableByOutputFormat() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + table.setSd(sd); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsIcebergTableByBoth() { + Table table = new Table(); Map tableParameters = new HashMap<>(); - tableParameters.put("table_type", "EXTERNAL"); + tableParameters.put("table_type", "ICEBERG"); + table.setParameters(tableParameters); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + table.setSd(sd); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsNotIcebergTableWithDifferentTableTypeAndOutputFormat() { + Table table = new Table(); + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "NICEBERG"); + table.setParameters(tableParameters); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.not.an.ice.berg.table"); + table.setSd(sd); + + assertThat(predicate.test(table)).isFalse(); + } + + @Test + void testIsNotIcebergTableWithWrongParameter() { + Table table = new Table(); + table.setParameters(Collections.singletonMap("table_type", "NICEBERG")); - assertThat(predicate.test(tableParameters)).isFalse(); + assertThat(predicate.test(table)).isFalse(); } @Test - void testNullParameters() { - assertThat(predicate.test(null)).isFalse(); + void testIsNotIcebergTableWithWrongStorageDescriptor() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.not.an.ice.berg.table"); + table.setSd(sd); + + assertThat(predicate.test(table)).isFalse(); } @Test - void testEmptyParameters() { - Map tableParameters = Collections.emptyMap(); + void testIsNotIcebergTableWithNoParametersOrSd() { + Table table = new Table(); + + assertThat(predicate.test(table)).isFalse(); + } - assertThat(predicate.test(tableParameters)).isFalse(); + @Test + void testIsIcebergTableWithStorageDescriptorButDifferentTableType() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + table.setSd(sd); + table.setParameters(Collections.singletonMap("table_type", "NICEBERG")); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsIcebergTableWithTableTypeButDifferentSd() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.not.an.ice.berg.table"); + table.setSd(sd); + table.setParameters(Collections.singletonMap("table_type", "ICEBERG")); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testNullTableThrowsException() { + assertThrows(NullPointerException.class, () -> predicate.test(null)); } } From aaca3749769538cc8cad0db82d06bb7d53f03bdf Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 11 Nov 2024 16:27:01 +0000 Subject: [PATCH 3/8] Undo some changes and update events filter class --- ...etadataSchedulerApiaryIntegrationTest.java | 76 ++-------- ...cedPathSchedulerApiaryIntegrationTest.java | 77 +++-------- .../integration/model/SqsMessage.java | 16 +-- .../integration/model/SqsMessageTest.java | 66 ++------- .../scheduler/apiary/context/CommonBeans.java | 9 +- .../IcebergTableListenerEventFilter.java | 58 +++----- .../apiary/service/SchedulerApiary.java | 56 ++------ .../IcebergTableListenerEventFilterTest.java | 7 +- .../apiary/service/SchedulerApiaryTest.java | 31 +---- .../predicate/IsIcebergTablePredicate.java | 60 -------- .../IsIcebergTablePredicateTest.java | 130 ------------------ 11 files changed, 84 insertions(+), 502 deletions(-) delete mode 100644 beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java delete mode 100644 beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index e39387bd..5d93f275 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2023 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,8 +126,7 @@ public void expiredMetadataCreateTableEvent() throws SQLException, IOException, await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null, true); - // assertMetrics() accepts a boolean value now so we can verify if metadata-scheduled is not present + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null); } @Test @@ -140,7 +139,7 @@ public void expiredMetadataAlterTableEvent() throws SQLException, IOException, U await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null, true); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null); } @Test @@ -157,7 +156,7 @@ public void expiredMetadataAddPartitionEvent() throws SQLException, IOException, List expiredMetadata = getExpiredMetadata(); // check first entry is for the table assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME, true); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME); } @Test @@ -177,8 +176,8 @@ public void expiredMetadataMultipleAddPartitionEvents() throws SQLException, IOE List expiredMetadata = getExpiredMetadata(); // check first entry is for the table assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME, true); - assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME, true); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME); + assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME); } @Test @@ -192,7 +191,7 @@ public void expiredMetadataAlterPartitionTableEvent() throws SQLException, IOExc await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME, true); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME); } @Test @@ -210,28 +209,8 @@ public void expiredMetadataMultipleAlterPartitionTableEvents() throws SQLExcepti await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 2); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME, true); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME, true); - } - - // New test to check if expired metadata for Iceberg tables is filtered - @Test - public void expiredMetadataIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { - //create a message for an Iceberg table by including table_type=ICEBERG in the payload - CreateTableSqsMessage createIcebergTableSqsMessage = new CreateTableSqsMessage(LOCATION_A, true); - createIcebergTableSqsMessage.setTableType("ICEBERG"); - createIcebergTableSqsMessage.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); - amazonSQS.sendMessage(sendMessageRequest(createIcebergTableSqsMessage.getFormattedString())); - // wait for SchedulerApiary to process message - await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 0); - // asserts that no expired metadata was scheduled - List expiredMetadata = getExpiredMetadata(); - assertThat(expiredMetadata).isEmpty(); - // verify metrics (updated assertMetrics) below - assertMetrics(false); - // assert the event was deleted from the queue - int queueSize = getSqsQueueSize(); - assertThat(queueSize).isEqualTo(0); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME); } @Test @@ -254,9 +233,9 @@ private SendMessageRequest sendMessageRequest(String payload) { return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } - private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName, boolean expectScheduledExpiredMetric) { + private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName) { assertHousekeepingMetadata(actual, expectedPath, partitionName); - assertMetrics(expectScheduledExpiredMetric); + assertMetrics(); } public void assertHousekeepingMetadata( @@ -277,40 +256,13 @@ public void assertHousekeepingMetadata( assertThat(actual.getLifecycleType()).isEqualTo(EXPIRED.toString()); } - /** - * Previously, assertExpiredMetadata didn't differentiate between whether specific metrics (e.g., metadata-scheduled) were expected to be present or not - * Adding boolean param allows us to check if SCHEDULED_EXPIRED_METRIC exists - * This allows me to check if the metadata-scheduled is present in expiredMetadataIcebergTableEventIsFiltered test. - */ - public void assertMetrics(boolean expectScheduledExpiredMetric) { + public void assertMetrics() { Set meterRegistry = ((CompositeMeterRegistry) BeekeeperSchedulerApiary.meterRegistry()) .getRegistries(); assertThat(meterRegistry).hasSize(2); meterRegistry.forEach(registry -> { List meters = registry.getMeters(); - if (expectScheduledExpiredMetric) { - assertThat(meters).extracting("id", Meter.Id.class) - .extracting("name") - .contains(SCHEDULED_EXPIRED_METRIC); - } else { - assertThat(meters).extracting("id", Meter.Id.class) - .extracting("name") - .doesNotContain(SCHEDULED_EXPIRED_METRIC); - } + assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_EXPIRED_METRIC); }); } - - // retrieves the current number of messages to check if the event has been added to the SQS queue or successfully ignored - private int getSqsQueueSize() { - String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); - // fetch the number of messages - String approximateNumberOfMessages = amazonSQS.getQueueAttributes(queueUrl, List.of("ApproximateNumberOfMessages")) - .getAttributes() - .get("ApproximateNumberOfMessages"); - - //return the count as an integer - return approximateNumberOfMessages != null && !approximateNumberOfMessages.isEmpty() - ? Integer.parseInt(approximateNumberOfMessages) - : 0; - } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 7f2904ad..2cd0f1e5 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2023 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,9 +68,8 @@ @Testcontainers public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { - // changes similar to BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest - private static final int TIMEOUT = 30; - // updated to match BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest, asynchronous operations so 5 seconds might not be enough + + private static final int TIMEOUT = 5; private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; private static final String QUEUE = "apiary-receiver-queue"; @@ -119,7 +118,7 @@ public void unreferencedAlterTableEvent() throws SQLException, IOException, URIS await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 1); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation", true); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation"); } @Test @@ -133,8 +132,8 @@ public void unreferencedMultipleAlterTableEvents() throws SQLException, IOExcept await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation", true); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation", true); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation"); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation"); } @Test @@ -150,15 +149,15 @@ public void unreferencedAlterPartitionEvent() throws SQLException, IOException, await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation"); } @Test public void unreferencedMultipleAlterPartitionEvent() throws IOException, SQLException, URISyntaxException { List .of(new AlterPartitionSqsMessage("s3://bucket/table/expiredTableLocation", - "s3://bucket/table/partitionLocation", "s3://bucket/table/unreferencedPartitionLocation", true, true), + "s3://bucket/table/partitionLocation", "s3://bucket/table/unreferencedPartitionLocation", true, true), new AlterPartitionSqsMessage("s3://bucket/table/expiredTableLocation2", "s3://bucket/table/partitionLocation2", "s3://bucket/table/partitionLocation", true, true)) .forEach(msg -> amazonSQS.sendMessage(sendMessageRequest(msg.getFormattedString()))); @@ -166,8 +165,8 @@ public void unreferencedMultipleAlterPartitionEvent() throws IOException, SQLExc await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation"); } @Test @@ -180,8 +179,8 @@ public void unreferencedDropPartitionEvent() throws SQLException, IOException, U await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/partitionLocation2", true); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/partitionLocation2"); } @Test @@ -193,25 +192,8 @@ public void unreferencedDropTableEvent() throws SQLException, IOException, URISy await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/tableLocation", true); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation2", true); - } - - @Test - public void unreferencedIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { - DropTableSqsMessage dropIcebergTableSqsMessage = new DropTableSqsMessage("s3://bucket/icebergTableLocation", true, true); - dropIcebergTableSqsMessage.setTableType("ICEBERG"); - dropIcebergTableSqsMessage.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); - amazonSQS.sendMessage(sendMessageRequest(dropIcebergTableSqsMessage.getFormattedString())); - - await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 0); - - List unreferencedPaths = getUnreferencedPaths(); - assertThat(unreferencedPaths).isEmpty(); - assertMetrics(false); - - int queueSize = getSqsQueueSize(); - assertThat(queueSize).isEqualTo(0); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/tableLocation"); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation2"); } @Test @@ -234,9 +216,9 @@ private SendMessageRequest sendMessageRequest(String payload) { return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } - private void assertUnreferencedPath(HousekeepingPath actual, String expectedPath, boolean expectScheduledUnreferencedMetric) { + private void assertUnreferencedPath(HousekeepingPath actual, String expectedPath) { assertHousekeepingEntity(actual, expectedPath); - assertMetrics(expectScheduledUnreferencedMetric); + assertMetrics(); } public void assertHousekeepingEntity(HousekeepingPath actual, String expectedPath) { @@ -253,32 +235,13 @@ public void assertHousekeepingEntity(HousekeepingPath actual, String expectedPat assertThat(actual.getLifecycleType()).isEqualTo(UNREFERENCED.toString()); } - public void assertMetrics(boolean expectScheduledUnreferencedMetric) { + public void assertMetrics() { Set meterRegistry = ((CompositeMeterRegistry) BeekeeperSchedulerApiary.meterRegistry()) .getRegistries(); assertThat(meterRegistry).hasSize(2); meterRegistry.forEach(registry -> { List meters = registry.getMeters(); - if (expectScheduledUnreferencedMetric) { - assertThat(meters).extracting("id", Meter.Id.class) - .extracting("name") - .contains(SCHEDULED_ORPHANED_METRIC); - } else { - assertThat(meters).extracting("id", Meter.Id.class) - .extracting("name") - .doesNotContain(SCHEDULED_ORPHANED_METRIC); - } + assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_ORPHANED_METRIC); }); } - - private int getSqsQueueSize() { - String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); - String approximateNumberOfMessages = amazonSQS.getQueueAttributes(queueUrl, List.of("ApproximateNumberOfMessages")) - .getAttributes() - .get("ApproximateNumberOfMessages"); - - return approximateNumberOfMessages != null && !approximateNumberOfMessages.isEmpty() - ? Integer.parseInt(approximateNumberOfMessages) - : 0; - } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index cd1dc37e..b9dea7ff 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2020 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,18 +99,6 @@ public void setWhitelisted(boolean isWhitelisted) { tableParameters.add(BEEKEEPER_HIVE_EVENT_WHITELIST, new JsonPrimitive(whitelist)); } - //enable the setting of the table_type parameter in SQS messages, to allow tests to simulate events for Iceberg/non-Iceberg tables. - public void setTableType(String tableType) { - JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); - tableParameters.add("table_type", new JsonPrimitive(tableType)); - } - - // New method to set output_format - public void setOutputFormat(String outputFormat) { - JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); - tableParameters.add("output_format", new JsonPrimitive(outputFormat)); - } - public final String getFormattedString() { apiaryEventJsonObject.add(APIARY_EVENT_MESSAGE_KEY, new JsonPrimitive(apiaryEventMessageJsonObject.toString())); return apiaryEventJsonObject.toString(); @@ -119,4 +107,4 @@ public final String getFormattedString() { public JsonObject getApiaryEventMessageJsonObject() { return apiaryEventMessageJsonObject; } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java index 23095658..f624f0f3 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java @@ -34,14 +34,8 @@ public class SqsMessageTest { - private static final Set COMMON_KEYS = Set.of( - "protocolVersion", - "eventType", - "tableParameters", - "dbName", - "tableName", - "tableLocation" - ); + private static final Set COMMON_KEYS = Set.of("protocolVersion", "eventType", "tableParameters", + "dbName", "tableName", "tableLocation"); @Test public void testCreateTableFormat() throws IOException, URISyntaxException { @@ -56,14 +50,9 @@ public void testAddPartitionFormat() throws IOException, URISyntaxException { "partitionKeys", "partitionValues", "partitionLocation", - "tableParameters" - ); - AddPartitionSqsMessage message = new AddPartitionSqsMessage( - DUMMY_LOCATION, - DUMMY_PARTITION_KEYS, - DUMMY_PARTITION_VALUES, - true - ); + "tableParameters"); + AddPartitionSqsMessage message = new AddPartitionSqsMessage(DUMMY_LOCATION, DUMMY_PARTITION_KEYS, + DUMMY_PARTITION_VALUES, true); assertKeys(message, specificKeys, "ADD_PARTITION"); } @@ -74,14 +63,9 @@ public void testAlterPartitionFormat() throws IOException, URISyntaxException { "partitionValues", "partitionLocation", "oldPartitionValues", - "oldPartitionLocation" - ); - AlterPartitionSqsMessage message = new AlterPartitionSqsMessage( - DUMMY_LOCATION, - DUMMY_PARTITION_KEYS, - DUMMY_PARTITION_VALUES, - true - ); + "oldPartitionLocation"); + AlterPartitionSqsMessage message = new AlterPartitionSqsMessage(DUMMY_LOCATION, DUMMY_PARTITION_KEYS, + DUMMY_PARTITION_VALUES, true); assertKeys(message, specificKeys, "ALTER_PARTITION"); } @@ -89,8 +73,7 @@ public void testAlterPartitionFormat() throws IOException, URISyntaxException { public void testAlterTableFormat() throws IOException, URISyntaxException { Set specificKeys = Set.of( "oldTableName", - "oldTableLocation" - ); + "oldTableLocation"); AlterTableSqsMessage message = new AlterTableSqsMessage(DUMMY_LOCATION, true); assertKeys(message, specificKeys, "ALTER_TABLE"); } @@ -101,8 +84,7 @@ public void testDropPartitionFormat() throws IOException, URISyntaxException { "partitionKeys", "partitionValues", "partitionLocation", - "tableParameters" - ); + "tableParameters"); DropPartitionSqsMessage message = new DropPartitionSqsMessage(DUMMY_LOCATION, true, true); assertKeys(message, specificKeys, "DROP_PARTITION"); } @@ -125,30 +107,4 @@ private void assertKeys(SqsMessage sqsMessage, Set specificKeys, String assertThat(object.get("eventType").getAsString()).isEqualTo(eventType); assertThat(object.keySet()).isEqualTo(mergedSet); } - - @Test - public void testSetTableType() throws IOException, URISyntaxException { - CreateTableSqsMessage message = new CreateTableSqsMessage(DUMMY_LOCATION, true); - - message.setTableType("ICEBERG"); - - JsonObject object = message.getApiaryEventMessageJsonObject(); - JsonObject tableParameters = object.getAsJsonObject("tableParameters"); - - assertThat(tableParameters.get("table_type").getAsString()).isEqualTo("ICEBERG"); - } - - //test method to verify setOutputFormat functionality - @Test - public void testSetOutputFormat() throws IOException, URISyntaxException { - CreateTableSqsMessage message = new CreateTableSqsMessage(DUMMY_LOCATION, true); - - String outputFormatValue = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; - message.setOutputFormat(outputFormatValue); - - JsonObject object = message.getApiaryEventMessageJsonObject(); - JsonObject tableParameters = object.getAsJsonObject("tableParameters"); - - assertThat(tableParameters.get("output_format").getAsString()).isEqualTo(outputFormatValue); - } -} +} \ No newline at end of file diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java index 492017c6..74fffbab 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,7 @@ import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.filter.EventTypeListenerEventFilter; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.ListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.LocationOnlyUpdateListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.TableParameterListenerEventFilter; @@ -96,7 +97,8 @@ public MessageEventHandler unreferencedHousekeepingPathMessageEventHandler( new EventTypeListenerEventFilter(eventClasses), new LocationOnlyUpdateListenerEventFilter(), new TableParameterListenerEventFilter(), - new WhitelistedListenerEventFilter() + new WhitelistedListenerEventFilter(), + new IcebergTableListenerEventFilter() ); return new MessageEventHandler(generator, filters); @@ -120,7 +122,8 @@ public MessageEventHandler expiredHousekeepingMetadataMessageEventHandler( List filters = List.of( new EventTypeListenerEventFilter(eventClasses), - new TableParameterListenerEventFilter() + new TableParameterListenerEventFilter(), + new IcebergTableListenerEventFilter() ); return new MessageEventHandler(generator, filters); diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 6cc457fd..0dfad6b7 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -15,61 +15,41 @@ */ package com.expediagroup.beekeeper.scheduler.apiary.filter; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Component; import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; - import com.expediagroup.beekeeper.core.model.LifecycleEventType; -import com.expediagroup.beekeeper.scheduler.service.predicate.IsIcebergTablePredicate; -// Class to intercept and filter events received and determine whether they should be processed or not -@Component +import java.util.Map; + public class IcebergTableListenerEventFilter implements ListenerEventFilter { private static final Logger log = LogManager.getLogger(IcebergTableListenerEventFilter.class); - private final IsIcebergTablePredicate isIcebergTablePredicate; + private static final String TABLE_TYPE_KEY = "table_type"; + private static final String TABLE_TYPE_ICEBERG_VALUE = "ICEBERG"; - @Autowired - public IcebergTableListenerEventFilter(IsIcebergTablePredicate predicate) { - this.isIcebergTablePredicate = predicate; - // inject and assign predicate - } -// check if the table is an iceberg table and log if it is @Override public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { - Table table = createTableFromListenerEvent(event); - if (isIcebergTablePredicate.test(table)) { - log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); - // Logging when iceberg table is ignored, as per ticket - return true; + Map tableParameters = event.getTableParameters(); + + // Check if the table_type parameter indicates an Iceberg table + if (tableParameters != null) { + String tableType = tableParameters.get(TABLE_TYPE_KEY); + if (TABLE_TYPE_ICEBERG_VALUE.equalsIgnoreCase(tableType)) { + log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); + return true; + } } - return false; - } - // create table from listener event to be used in predicate - private Table createTableFromListenerEvent(ListenerEvent event) { - // create table from listener event - Table table = new Table(); - table.setDbName(event.getDbName()); - table.setTableName(event.getTableName()); - table.setParameters(event.getTableParameters()); - StorageDescriptor sd = new StorageDescriptor(); - Map tableParameters = event.getTableParameters(); // retrieve table params and assigns to map - String outputFormat = ""; // initialize output format - if (tableParameters != null) {// if table parameters are not null - outputFormat = tableParameters.getOrDefault("output_format", "");// get output format from table parameters + // Check if the output_format indicates an Iceberg table + String outputFormat = (tableParameters != null) ? tableParameters.get("output_format") : null; + if (outputFormat != null && outputFormat.toLowerCase().contains("iceberg")) { + log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); + return true; } - sd.setOutputFormat(outputFormat); // set output format in storafe descriptor - table.setSd(sd); // attach storage descriptor to table - return table; + return false; } } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index b4904ae8..ed9a1825 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2020 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,76 +26,35 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; -import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; -import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; - import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; -import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; -// class scheduling housekeeping events based on Beekeeper events - -/** - * original flow of the class: - * read beekeperevent → for every housekeepingentity → determine lifecycleeventtype → schedule housekeeping → delete event - * - * updated flow of the class: - * read beekeperevent → extract messageevent → extract listenerevent → determine lifecycleeventtype → - * if iceberg table → ignore & delete event - * else → for every housekeepingentity → determine lifecycleeventtype → schedule housekeeping → delete event - * - */ - @Component public class SchedulerApiary { private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; - private final IcebergTableListenerEventFilter icebergTableListenerEventFilter; @Autowired public SchedulerApiary( BeekeeperEventReader beekeeperEventReader, - EnumMap schedulerServiceMap, - IcebergTableListenerEventFilter icebergTableListenerEventFilter + EnumMap schedulerServiceMap ) { this.beekeeperEventReader = beekeeperEventReader; this.schedulerServiceMap = schedulerServiceMap; - this.icebergTableListenerEventFilter = icebergTableListenerEventFilter; } @Transactional public void scheduleBeekeeperEvent() { - Optional beekeeperEventOptional = beekeeperEventReader.read(); - if (beekeeperEventOptional.isEmpty()) { - return; - } - - // extract the messageEvent from beekeeperEvent so we can extract ListenerEvent - // to provide information about the event inc table params to pass to icebergTableListenerEventFilter - BeekeeperEvent beekeeperEvent = beekeeperEventOptional.get(); - MessageEvent messageEvent = beekeeperEvent.getMessageEvent(); - ListenerEvent listenerEvent = messageEvent.getEvent(); - + Optional housekeepingEntitiesToBeScheduled = beekeeperEventReader.read(); + if (housekeepingEntitiesToBeScheduled.isEmpty()) { return; } + BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); - // we didn't check if housekeepingEntities was empty before, might lead to silent failures? - if (housekeepingEntities.isEmpty()) { - throw new BeekeeperException("No housekeeping entities found in the event"); - } - LifecycleEventType lifecycleEventType = LifecycleEventType.valueOf(housekeepingEntities.get(0).getLifecycleType()); - - // apply Iceberg table filter - if (icebergTableListenerEventFilter.isFiltered(listenerEvent, lifecycleEventType)) { - // delete event and skip processing - beekeeperEventReader.delete(beekeeperEvent); - return; - } - for (HousekeepingEntity entity : housekeepingEntities) { try { LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); @@ -104,7 +63,8 @@ public void scheduleBeekeeperEvent() { } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", - entity.getLifecycleType()), e); + entity.getLifecycleType()), + e); } } @@ -114,4 +74,4 @@ public void scheduleBeekeeperEvent() { public void close() throws IOException { beekeeperEventReader.close(); } -} +} \ No newline at end of file diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java index 29fe13e0..5f8bb958 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -26,18 +26,14 @@ import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; import com.expediagroup.beekeeper.core.model.LifecycleEventType; -import com.expediagroup.beekeeper.scheduler.service.predicate.IsIcebergTablePredicate; -// class to test the IcebergTableListenerEventFilter public class IcebergTableListenerEventFilterTest { private IcebergTableListenerEventFilter filter; - private IsIcebergTablePredicate predicate; @BeforeEach public void setUp() { - predicate = new IsIcebergTablePredicate(); - filter = new IcebergTableListenerEventFilter(predicate); + filter = new IcebergTableListenerEventFilter(); } @Test @@ -205,7 +201,6 @@ void testIsFilteredTableWithNullParametersAndStorageDescriptor() { assertThat(result).isFalse(); } - // helper method to create a ListenerEvent private ListenerEvent createListenerEvent(String dbName, String tableName, Map tableParameters) { return new ListenerEvent() { @Override diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 2d5dfcbc..d4388bee 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2020 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,7 +47,6 @@ import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.LifecycleEventType; -import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.ExpiredHousekeepingMetadataSchedulerService; @@ -62,7 +61,6 @@ public class SchedulerApiaryTest { @Mock private BeekeeperEventReader beekeeperEventReader; @Mock private HousekeepingPath path; @Mock private HousekeepingMetadata table; - @Mock private IcebergTableListenerEventFilter icebergTableListenerEventFilter; private SchedulerApiary scheduler; @@ -71,17 +69,14 @@ public void init() { EnumMap schedulerMap = new EnumMap<>(LifecycleEventType.class); schedulerMap.put(UNREFERENCED, pathSchedulerService); schedulerMap.put(EXPIRED, tableSchedulerService); - scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap, icebergTableListenerEventFilter); + scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap); } @Test public void typicalPathSchedule() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); - when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); - scheduler.scheduleBeekeeperEvent(); - verify(pathSchedulerService).scheduleForHousekeeping(path); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -91,34 +86,16 @@ public void typicalPathSchedule() { public void typicalTableSchedule() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); - when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); - scheduler.scheduleBeekeeperEvent(); - verify(tableSchedulerService).scheduleForHousekeeping(table); verifyNoInteractions(pathSchedulerService); verify(beekeeperEventReader).delete(event.get()); } - @Test - public void icebergTableEventIsFiltered() { - HousekeepingEntity entity = path; // or table, as appropriate - Optional event = Optional.of(newHousekeepingEvent(entity, UNREFERENCED)); - when(beekeeperEventReader.read()).thenReturn(event); - when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(true); - - scheduler.scheduleBeekeeperEvent(); - - verifyNoInteractions(pathSchedulerService); - verifyNoInteractions(tableSchedulerService); - verify(beekeeperEventReader).delete(event.get()); - } - @Test public void typicalNoSchedule() { when(beekeeperEventReader.read()).thenReturn(Optional.empty()); scheduler.scheduleBeekeeperEvent(); - verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader, times(0)).delete(any()); @@ -128,7 +105,6 @@ public void typicalNoSchedule() { public void housekeepingPathRepositoryThrowsException() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); - when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); doThrow(new BeekeeperException("exception")).when(pathSchedulerService).scheduleForHousekeeping(path); try { @@ -149,7 +125,6 @@ public void housekeepingPathRepositoryThrowsException() { public void housekeepingTableRepositoryThrowsException() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); - when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); doThrow(new BeekeeperException("exception")).when(tableSchedulerService).scheduleForHousekeeping(table); try { @@ -177,4 +152,4 @@ private BeekeeperEvent newHousekeepingEvent(HousekeepingEntity housekeepingEntit when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); return new BeekeeperEvent(List.of(housekeepingEntity), Mockito.mock(MessageEvent.class)); } -} +} \ No newline at end of file diff --git a/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java deleted file mode 100644 index a080eab4..00000000 --- a/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Copyright (C) 2019-2024 Expedia, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.expediagroup.beekeeper.scheduler.service.predicate; - -import static org.apache.commons.lang3.StringUtils.containsIgnoreCase; -import static org.apache.commons.lang3.StringUtils.equalsIgnoreCase; - -import java.util.Map; -import java.util.function.Predicate; - -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.springframework.stereotype.Component; - -import lombok.NonNull; - -// class to determine if a table is an iceberg table based on `table_type` parameter -// based off IsIcbergTablePredicate class used in Icekeeper -@Component -public class IsIcebergTablePredicate implements Predicate
{ - - private static final String TABLE_TYPE_KEY = "table_type"; - private static final String TABLE_TYPE_ICEBERG_VALUE = "ICEBERG"; - - @Override - public boolean test(@NonNull Table table) { - return (hasSdProperty(table) || hasTableParameter(table)); - } -// check if the table has the output format property set to iceberg - private boolean hasSdProperty(Table table) { - StorageDescriptor sd = table.getSd(); - if (sd != null) { - String tableOutputFormat = sd.getOutputFormat(); - return containsIgnoreCase(tableOutputFormat, "iceberg"); - } - return false; - } -//retrieve the table parameters and check if the table type is ICEBERG - private boolean hasTableParameter(Table table) { - Map parameters = table.getParameters(); - if (parameters != null) { - String tableType = table.getParameters().get(TABLE_TYPE_KEY); - return equalsIgnoreCase(TABLE_TYPE_ICEBERG_VALUE, tableType); - } - return false; - } -} diff --git a/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java deleted file mode 100644 index bf0f7e77..00000000 --- a/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java +++ /dev/null @@ -1,130 +0,0 @@ -/** - * Copyright (C) 2019-2024 Expedia, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.expediagroup.beekeeper.scheduler.service.predicate; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.junit.jupiter.api.Test; - -class IsIcebergTablePredicateTest { - - private IsIcebergTablePredicate predicate = new IsIcebergTablePredicate(); - - @Test - void testIsIcebergTableByTableType() { - Table table = new Table(); - Map tableParameters = new HashMap<>(); - tableParameters.put("table_type", "ICEBERG"); - table.setParameters(tableParameters); - - assertThat(predicate.test(table)).isTrue(); - } - - @Test - void testIsIcebergTableByOutputFormat() { - Table table = new Table(); - StorageDescriptor sd = new StorageDescriptor(); - sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); - table.setSd(sd); - - assertThat(predicate.test(table)).isTrue(); - } - - @Test - void testIsIcebergTableByBoth() { - Table table = new Table(); - Map tableParameters = new HashMap<>(); - tableParameters.put("table_type", "ICEBERG"); - table.setParameters(tableParameters); - StorageDescriptor sd = new StorageDescriptor(); - sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); - table.setSd(sd); - - assertThat(predicate.test(table)).isTrue(); - } - - @Test - void testIsNotIcebergTableWithDifferentTableTypeAndOutputFormat() { - Table table = new Table(); - Map tableParameters = new HashMap<>(); - tableParameters.put("table_type", "NICEBERG"); - table.setParameters(tableParameters); - StorageDescriptor sd = new StorageDescriptor(); - sd.setOutputFormat("org.apache.not.an.ice.berg.table"); - table.setSd(sd); - - assertThat(predicate.test(table)).isFalse(); - } - - @Test - void testIsNotIcebergTableWithWrongParameter() { - Table table = new Table(); - table.setParameters(Collections.singletonMap("table_type", "NICEBERG")); - - assertThat(predicate.test(table)).isFalse(); - } - - @Test - void testIsNotIcebergTableWithWrongStorageDescriptor() { - Table table = new Table(); - StorageDescriptor sd = new StorageDescriptor(); - sd.setOutputFormat("org.apache.not.an.ice.berg.table"); - table.setSd(sd); - - assertThat(predicate.test(table)).isFalse(); - } - - @Test - void testIsNotIcebergTableWithNoParametersOrSd() { - Table table = new Table(); - - assertThat(predicate.test(table)).isFalse(); - } - - @Test - void testIsIcebergTableWithStorageDescriptorButDifferentTableType() { - Table table = new Table(); - StorageDescriptor sd = new StorageDescriptor(); - sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); - table.setSd(sd); - table.setParameters(Collections.singletonMap("table_type", "NICEBERG")); - - assertThat(predicate.test(table)).isTrue(); - } - - @Test - void testIsIcebergTableWithTableTypeButDifferentSd() { - Table table = new Table(); - StorageDescriptor sd = new StorageDescriptor(); - sd.setOutputFormat("org.apache.not.an.ice.berg.table"); - table.setSd(sd); - table.setParameters(Collections.singletonMap("table_type", "ICEBERG")); - - assertThat(predicate.test(table)).isTrue(); - } - - @Test - void testNullTableThrowsException() { - assertThrows(NullPointerException.class, () -> predicate.test(null)); - } -} From c661511da608d60edc94cd7bfafb364d6556fe7a Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 11 Nov 2024 16:30:47 +0000 Subject: [PATCH 4/8] revert --- ...eekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java | 4 ++-- ...ekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java | 2 +- beekeeper-scheduler/pom.xml | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index 5d93f275..625dbcd1 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -265,4 +265,4 @@ public void assertMetrics() { assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_EXPIRED_METRIC); }); } -} \ No newline at end of file +} diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 2cd0f1e5..3bc063d1 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index e924ab69..cc2f7d9b 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -90,3 +90,4 @@ + From f60ac1519c395d8d3058ec4a136cc56acb2909fc Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 11 Nov 2024 16:33:01 +0000 Subject: [PATCH 5/8] revert --- .../integration/model/SqsMessage.java | 3 +- .../integration/model/SqsMessageTest.java | 5 +- .../apiary/service/SchedulerApiaryTest.java | 3 +- beekeeper-scheduler/pom.xml | 62 +------------------ 4 files changed, 8 insertions(+), 65 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index b9dea7ff..7bc2dfe1 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -107,4 +107,5 @@ public final String getFormattedString() { public JsonObject getApiaryEventMessageJsonObject() { return apiaryEventMessageJsonObject; } -} \ No newline at end of file +} + diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java index f624f0f3..e47e4563 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2020 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,4 +107,5 @@ private void assertKeys(SqsMessage sqsMessage, Set specificKeys, String assertThat(object.get("eventType").getAsString()).isEqualTo(eventType); assertThat(object.keySet()).isEqualTo(mergedSet); } -} \ No newline at end of file +} + diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index d4388bee..760d805f 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -152,4 +152,5 @@ private BeekeeperEvent newHousekeepingEvent(HousekeepingEntity housekeepingEntit when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); return new BeekeeperEvent(List.of(housekeepingEntity), Mockito.mock(MessageEvent.class)); } -} \ No newline at end of file +} + diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index cc2f7d9b..bec042ea 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -10,11 +10,6 @@ beekeeper-scheduler - - 2.3.7 - 1.4.2 - - com.expediagroup @@ -22,54 +17,6 @@ ${project.version} - - - org.apache.hive - hive-metastore - ${hive.version} - - - org.apache.hbase - hbase-client - - - org.slf4j - slf4j-log4j12 - - - log4j-slf4j-impl - org.apache.logging.log4j - - - junit - junit - - - org.eclipse.jetty.aggregate - jetty-all - - - org.eclipse.jetty.orbit - javax.servlet - - - javax.servlet - servlet-api - - - - - com.hotels - hcommon-hive-metastore - ${hcommon-hive-metastore.version} - - - net.java.dev.jna - jna - - - - ch.qos.logback logback-core @@ -82,12 +29,5 @@ org.springframework.boot spring-boot-starter-web - - org.projectlombok - lombok - ${lombok.version} - provided - - - + \ No newline at end of file From 7eb19dbd6a6b709bc5eb0288a35309d96166c38c Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 12 Nov 2024 12:56:45 +0000 Subject: [PATCH 6/8] Revert "revert" This reverts commit f60ac1519c395d8d3058ec4a136cc56acb2909fc. --- .../integration/model/SqsMessage.java | 3 +- .../integration/model/SqsMessageTest.java | 5 +- .../apiary/service/SchedulerApiaryTest.java | 3 +- beekeeper-scheduler/pom.xml | 62 ++++++++++++++++++- 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index 7bc2dfe1..b9dea7ff 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -107,5 +107,4 @@ public final String getFormattedString() { public JsonObject getApiaryEventMessageJsonObject() { return apiaryEventMessageJsonObject; } -} - +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java index e47e4563..f624f0f3 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,5 +107,4 @@ private void assertKeys(SqsMessage sqsMessage, Set specificKeys, String assertThat(object.get("eventType").getAsString()).isEqualTo(eventType); assertThat(object.keySet()).isEqualTo(mergedSet); } -} - +} \ No newline at end of file diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 760d805f..d4388bee 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -152,5 +152,4 @@ private BeekeeperEvent newHousekeepingEvent(HousekeepingEntity housekeepingEntit when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); return new BeekeeperEvent(List.of(housekeepingEntity), Mockito.mock(MessageEvent.class)); } -} - +} \ No newline at end of file diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index bec042ea..cc2f7d9b 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -10,6 +10,11 @@ beekeeper-scheduler + + 2.3.7 + 1.4.2 + + com.expediagroup @@ -17,6 +22,54 @@ ${project.version} + + + org.apache.hive + hive-metastore + ${hive.version} + + + org.apache.hbase + hbase-client + + + org.slf4j + slf4j-log4j12 + + + log4j-slf4j-impl + org.apache.logging.log4j + + + junit + junit + + + org.eclipse.jetty.aggregate + jetty-all + + + org.eclipse.jetty.orbit + javax.servlet + + + javax.servlet + servlet-api + + + + + com.hotels + hcommon-hive-metastore + ${hcommon-hive-metastore.version} + + + net.java.dev.jna + jna + + + + ch.qos.logback logback-core @@ -29,5 +82,12 @@ org.springframework.boot spring-boot-starter-web + + org.projectlombok + lombok + ${lombok.version} + provided + - \ No newline at end of file + + From e9e2da8fd85fa34f1e1f331e0632a08d9345189e Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 12 Nov 2024 12:57:03 +0000 Subject: [PATCH 7/8] Revert "revert" This reverts commit c661511da608d60edc94cd7bfafb364d6556fe7a. --- ...eekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java | 4 ++-- ...ekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java | 2 +- beekeeper-scheduler/pom.xml | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index 625dbcd1..5d93f275 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2023 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -265,4 +265,4 @@ public void assertMetrics() { assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_EXPIRED_METRIC); }); } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 3bc063d1..2cd0f1e5 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2023 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index cc2f7d9b..e924ab69 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -90,4 +90,3 @@ - From 7ff8ce9100b9eaec2e6cc2608bb56ba7a13c1d84 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 12 Nov 2024 12:57:19 +0000 Subject: [PATCH 8/8] Revert "Undo some changes and update events filter class" This reverts commit aaca3749769538cc8cad0db82d06bb7d53f03bdf. --- ...etadataSchedulerApiaryIntegrationTest.java | 76 ++++++++-- ...cedPathSchedulerApiaryIntegrationTest.java | 77 ++++++++--- .../integration/model/SqsMessage.java | 16 ++- .../integration/model/SqsMessageTest.java | 66 +++++++-- .../scheduler/apiary/context/CommonBeans.java | 9 +- .../IcebergTableListenerEventFilter.java | 58 +++++--- .../apiary/service/SchedulerApiary.java | 56 ++++++-- .../IcebergTableListenerEventFilterTest.java | 7 +- .../apiary/service/SchedulerApiaryTest.java | 31 ++++- .../predicate/IsIcebergTablePredicate.java | 60 ++++++++ .../IsIcebergTablePredicateTest.java | 130 ++++++++++++++++++ 11 files changed, 502 insertions(+), 84 deletions(-) create mode 100644 beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java create mode 100644 beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index 5d93f275..e39387bd 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,7 +126,8 @@ public void expiredMetadataCreateTableEvent() throws SQLException, IOException, await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null, true); + // assertMetrics() accepts a boolean value now so we can verify if metadata-scheduled is not present } @Test @@ -139,7 +140,7 @@ public void expiredMetadataAlterTableEvent() throws SQLException, IOException, U await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, null, true); } @Test @@ -156,7 +157,7 @@ public void expiredMetadataAddPartitionEvent() throws SQLException, IOException, List expiredMetadata = getExpiredMetadata(); // check first entry is for the table assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME, true); } @Test @@ -176,8 +177,8 @@ public void expiredMetadataMultipleAddPartitionEvents() throws SQLException, IOE List expiredMetadata = getExpiredMetadata(); // check first entry is for the table assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME); - assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME, true); + assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME, true); } @Test @@ -191,7 +192,7 @@ public void expiredMetadataAlterPartitionTableEvent() throws SQLException, IOExc await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME, true); } @Test @@ -209,8 +210,28 @@ public void expiredMetadataMultipleAlterPartitionTableEvents() throws SQLExcepti await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 2); List expiredMetadata = getExpiredMetadata(); - assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME); - assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME); + assertExpiredMetadata(expiredMetadata.get(0), LOCATION_A, PARTITION_A_NAME, true); + assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME, true); + } + + // New test to check if expired metadata for Iceberg tables is filtered + @Test + public void expiredMetadataIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { + //create a message for an Iceberg table by including table_type=ICEBERG in the payload + CreateTableSqsMessage createIcebergTableSqsMessage = new CreateTableSqsMessage(LOCATION_A, true); + createIcebergTableSqsMessage.setTableType("ICEBERG"); + createIcebergTableSqsMessage.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + amazonSQS.sendMessage(sendMessageRequest(createIcebergTableSqsMessage.getFormattedString())); + // wait for SchedulerApiary to process message + await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 0); + // asserts that no expired metadata was scheduled + List expiredMetadata = getExpiredMetadata(); + assertThat(expiredMetadata).isEmpty(); + // verify metrics (updated assertMetrics) below + assertMetrics(false); + // assert the event was deleted from the queue + int queueSize = getSqsQueueSize(); + assertThat(queueSize).isEqualTo(0); } @Test @@ -233,9 +254,9 @@ private SendMessageRequest sendMessageRequest(String payload) { return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } - private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName) { + private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName, boolean expectScheduledExpiredMetric) { assertHousekeepingMetadata(actual, expectedPath, partitionName); - assertMetrics(); + assertMetrics(expectScheduledExpiredMetric); } public void assertHousekeepingMetadata( @@ -256,13 +277,40 @@ public void assertHousekeepingMetadata( assertThat(actual.getLifecycleType()).isEqualTo(EXPIRED.toString()); } - public void assertMetrics() { + /** + * Previously, assertExpiredMetadata didn't differentiate between whether specific metrics (e.g., metadata-scheduled) were expected to be present or not + * Adding boolean param allows us to check if SCHEDULED_EXPIRED_METRIC exists + * This allows me to check if the metadata-scheduled is present in expiredMetadataIcebergTableEventIsFiltered test. + */ + public void assertMetrics(boolean expectScheduledExpiredMetric) { Set meterRegistry = ((CompositeMeterRegistry) BeekeeperSchedulerApiary.meterRegistry()) .getRegistries(); assertThat(meterRegistry).hasSize(2); meterRegistry.forEach(registry -> { List meters = registry.getMeters(); - assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_EXPIRED_METRIC); + if (expectScheduledExpiredMetric) { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .contains(SCHEDULED_EXPIRED_METRIC); + } else { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .doesNotContain(SCHEDULED_EXPIRED_METRIC); + } }); } -} \ No newline at end of file + + // retrieves the current number of messages to check if the event has been added to the SQS queue or successfully ignored + private int getSqsQueueSize() { + String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); + // fetch the number of messages + String approximateNumberOfMessages = amazonSQS.getQueueAttributes(queueUrl, List.of("ApproximateNumberOfMessages")) + .getAttributes() + .get("ApproximateNumberOfMessages"); + + //return the count as an integer + return approximateNumberOfMessages != null && !approximateNumberOfMessages.isEmpty() + ? Integer.parseInt(approximateNumberOfMessages) + : 0; + } +} diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 2cd0f1e5..7f2904ad 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,8 +68,9 @@ @Testcontainers public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { - - private static final int TIMEOUT = 5; + // changes similar to BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest + private static final int TIMEOUT = 30; + // updated to match BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest, asynchronous operations so 5 seconds might not be enough private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; private static final String QUEUE = "apiary-receiver-queue"; @@ -118,7 +119,7 @@ public void unreferencedAlterTableEvent() throws SQLException, IOException, URIS await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 1); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation", true); } @Test @@ -132,8 +133,8 @@ public void unreferencedMultipleAlterTableEvents() throws SQLException, IOExcept await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/oldTableLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation", true); } @Test @@ -149,15 +150,15 @@ public void unreferencedAlterPartitionEvent() throws SQLException, IOException, await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation", true); } @Test public void unreferencedMultipleAlterPartitionEvent() throws IOException, SQLException, URISyntaxException { List .of(new AlterPartitionSqsMessage("s3://bucket/table/expiredTableLocation", - "s3://bucket/table/partitionLocation", "s3://bucket/table/unreferencedPartitionLocation", true, true), + "s3://bucket/table/partitionLocation", "s3://bucket/table/unreferencedPartitionLocation", true, true), new AlterPartitionSqsMessage("s3://bucket/table/expiredTableLocation2", "s3://bucket/table/partitionLocation2", "s3://bucket/table/partitionLocation", true, true)) .forEach(msg -> amazonSQS.sendMessage(sendMessageRequest(msg.getFormattedString()))); @@ -165,8 +166,8 @@ public void unreferencedMultipleAlterPartitionEvent() throws IOException, SQLExc await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/unreferencedPartitionLocation", true); } @Test @@ -179,8 +180,8 @@ public void unreferencedDropPartitionEvent() throws SQLException, IOException, U await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/partitionLocation2"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/table/partitionLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/table/partitionLocation2", true); } @Test @@ -192,8 +193,25 @@ public void unreferencedDropTableEvent() throws SQLException, IOException, URISy await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 2); List unreferencedPaths = getUnreferencedPaths(); - assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/tableLocation"); - assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation2"); + assertUnreferencedPath(unreferencedPaths.get(0), "s3://bucket/tableLocation", true); + assertUnreferencedPath(unreferencedPaths.get(1), "s3://bucket/tableLocation2", true); + } + + @Test + public void unreferencedIcebergTableEventIsFiltered() throws SQLException, IOException, URISyntaxException { + DropTableSqsMessage dropIcebergTableSqsMessage = new DropTableSqsMessage("s3://bucket/icebergTableLocation", true, true); + dropIcebergTableSqsMessage.setTableType("ICEBERG"); + dropIcebergTableSqsMessage.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + amazonSQS.sendMessage(sendMessageRequest(dropIcebergTableSqsMessage.getFormattedString())); + + await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUnreferencedPathsRowCount() == 0); + + List unreferencedPaths = getUnreferencedPaths(); + assertThat(unreferencedPaths).isEmpty(); + assertMetrics(false); + + int queueSize = getSqsQueueSize(); + assertThat(queueSize).isEqualTo(0); } @Test @@ -216,9 +234,9 @@ private SendMessageRequest sendMessageRequest(String payload) { return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } - private void assertUnreferencedPath(HousekeepingPath actual, String expectedPath) { + private void assertUnreferencedPath(HousekeepingPath actual, String expectedPath, boolean expectScheduledUnreferencedMetric) { assertHousekeepingEntity(actual, expectedPath); - assertMetrics(); + assertMetrics(expectScheduledUnreferencedMetric); } public void assertHousekeepingEntity(HousekeepingPath actual, String expectedPath) { @@ -235,13 +253,32 @@ public void assertHousekeepingEntity(HousekeepingPath actual, String expectedPat assertThat(actual.getLifecycleType()).isEqualTo(UNREFERENCED.toString()); } - public void assertMetrics() { + public void assertMetrics(boolean expectScheduledUnreferencedMetric) { Set meterRegistry = ((CompositeMeterRegistry) BeekeeperSchedulerApiary.meterRegistry()) .getRegistries(); assertThat(meterRegistry).hasSize(2); meterRegistry.forEach(registry -> { List meters = registry.getMeters(); - assertThat(meters).extracting("id", Meter.Id.class).extracting("name").contains(SCHEDULED_ORPHANED_METRIC); + if (expectScheduledUnreferencedMetric) { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .contains(SCHEDULED_ORPHANED_METRIC); + } else { + assertThat(meters).extracting("id", Meter.Id.class) + .extracting("name") + .doesNotContain(SCHEDULED_ORPHANED_METRIC); + } }); } -} \ No newline at end of file + + private int getSqsQueueSize() { + String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); + String approximateNumberOfMessages = amazonSQS.getQueueAttributes(queueUrl, List.of("ApproximateNumberOfMessages")) + .getAttributes() + .get("ApproximateNumberOfMessages"); + + return approximateNumberOfMessages != null && !approximateNumberOfMessages.isEmpty() + ? Integer.parseInt(approximateNumberOfMessages) + : 0; + } +} diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index b9dea7ff..cd1dc37e 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,6 +99,18 @@ public void setWhitelisted(boolean isWhitelisted) { tableParameters.add(BEEKEEPER_HIVE_EVENT_WHITELIST, new JsonPrimitive(whitelist)); } + //enable the setting of the table_type parameter in SQS messages, to allow tests to simulate events for Iceberg/non-Iceberg tables. + public void setTableType(String tableType) { + JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); + tableParameters.add("table_type", new JsonPrimitive(tableType)); + } + + // New method to set output_format + public void setOutputFormat(String outputFormat) { + JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); + tableParameters.add("output_format", new JsonPrimitive(outputFormat)); + } + public final String getFormattedString() { apiaryEventJsonObject.add(APIARY_EVENT_MESSAGE_KEY, new JsonPrimitive(apiaryEventMessageJsonObject.toString())); return apiaryEventJsonObject.toString(); @@ -107,4 +119,4 @@ public final String getFormattedString() { public JsonObject getApiaryEventMessageJsonObject() { return apiaryEventMessageJsonObject; } -} \ No newline at end of file +} diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java index f624f0f3..23095658 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessageTest.java @@ -34,8 +34,14 @@ public class SqsMessageTest { - private static final Set COMMON_KEYS = Set.of("protocolVersion", "eventType", "tableParameters", - "dbName", "tableName", "tableLocation"); + private static final Set COMMON_KEYS = Set.of( + "protocolVersion", + "eventType", + "tableParameters", + "dbName", + "tableName", + "tableLocation" + ); @Test public void testCreateTableFormat() throws IOException, URISyntaxException { @@ -50,9 +56,14 @@ public void testAddPartitionFormat() throws IOException, URISyntaxException { "partitionKeys", "partitionValues", "partitionLocation", - "tableParameters"); - AddPartitionSqsMessage message = new AddPartitionSqsMessage(DUMMY_LOCATION, DUMMY_PARTITION_KEYS, - DUMMY_PARTITION_VALUES, true); + "tableParameters" + ); + AddPartitionSqsMessage message = new AddPartitionSqsMessage( + DUMMY_LOCATION, + DUMMY_PARTITION_KEYS, + DUMMY_PARTITION_VALUES, + true + ); assertKeys(message, specificKeys, "ADD_PARTITION"); } @@ -63,9 +74,14 @@ public void testAlterPartitionFormat() throws IOException, URISyntaxException { "partitionValues", "partitionLocation", "oldPartitionValues", - "oldPartitionLocation"); - AlterPartitionSqsMessage message = new AlterPartitionSqsMessage(DUMMY_LOCATION, DUMMY_PARTITION_KEYS, - DUMMY_PARTITION_VALUES, true); + "oldPartitionLocation" + ); + AlterPartitionSqsMessage message = new AlterPartitionSqsMessage( + DUMMY_LOCATION, + DUMMY_PARTITION_KEYS, + DUMMY_PARTITION_VALUES, + true + ); assertKeys(message, specificKeys, "ALTER_PARTITION"); } @@ -73,7 +89,8 @@ public void testAlterPartitionFormat() throws IOException, URISyntaxException { public void testAlterTableFormat() throws IOException, URISyntaxException { Set specificKeys = Set.of( "oldTableName", - "oldTableLocation"); + "oldTableLocation" + ); AlterTableSqsMessage message = new AlterTableSqsMessage(DUMMY_LOCATION, true); assertKeys(message, specificKeys, "ALTER_TABLE"); } @@ -84,7 +101,8 @@ public void testDropPartitionFormat() throws IOException, URISyntaxException { "partitionKeys", "partitionValues", "partitionLocation", - "tableParameters"); + "tableParameters" + ); DropPartitionSqsMessage message = new DropPartitionSqsMessage(DUMMY_LOCATION, true, true); assertKeys(message, specificKeys, "DROP_PARTITION"); } @@ -107,4 +125,30 @@ private void assertKeys(SqsMessage sqsMessage, Set specificKeys, String assertThat(object.get("eventType").getAsString()).isEqualTo(eventType); assertThat(object.keySet()).isEqualTo(mergedSet); } -} \ No newline at end of file + + @Test + public void testSetTableType() throws IOException, URISyntaxException { + CreateTableSqsMessage message = new CreateTableSqsMessage(DUMMY_LOCATION, true); + + message.setTableType("ICEBERG"); + + JsonObject object = message.getApiaryEventMessageJsonObject(); + JsonObject tableParameters = object.getAsJsonObject("tableParameters"); + + assertThat(tableParameters.get("table_type").getAsString()).isEqualTo("ICEBERG"); + } + + //test method to verify setOutputFormat functionality + @Test + public void testSetOutputFormat() throws IOException, URISyntaxException { + CreateTableSqsMessage message = new CreateTableSqsMessage(DUMMY_LOCATION, true); + + String outputFormatValue = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; + message.setOutputFormat(outputFormatValue); + + JsonObject object = message.getApiaryEventMessageJsonObject(); + JsonObject tableParameters = object.getAsJsonObject("tableParameters"); + + assertThat(tableParameters.get("output_format").getAsString()).isEqualTo(outputFormatValue); + } +} diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java index 74fffbab..492017c6 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2020 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,6 @@ import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.filter.EventTypeListenerEventFilter; -import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.ListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.LocationOnlyUpdateListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.TableParameterListenerEventFilter; @@ -97,8 +96,7 @@ public MessageEventHandler unreferencedHousekeepingPathMessageEventHandler( new EventTypeListenerEventFilter(eventClasses), new LocationOnlyUpdateListenerEventFilter(), new TableParameterListenerEventFilter(), - new WhitelistedListenerEventFilter(), - new IcebergTableListenerEventFilter() + new WhitelistedListenerEventFilter() ); return new MessageEventHandler(generator, filters); @@ -122,8 +120,7 @@ public MessageEventHandler expiredHousekeepingMetadataMessageEventHandler( List filters = List.of( new EventTypeListenerEventFilter(eventClasses), - new TableParameterListenerEventFilter(), - new IcebergTableListenerEventFilter() + new TableParameterListenerEventFilter() ); return new MessageEventHandler(generator, filters); diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 0dfad6b7..6cc457fd 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -15,41 +15,61 @@ */ package com.expediagroup.beekeeper.scheduler.apiary.filter; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; -import com.expediagroup.beekeeper.core.model.LifecycleEventType; -import java.util.Map; +import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.service.predicate.IsIcebergTablePredicate; +// Class to intercept and filter events received and determine whether they should be processed or not +@Component public class IcebergTableListenerEventFilter implements ListenerEventFilter { private static final Logger log = LogManager.getLogger(IcebergTableListenerEventFilter.class); - private static final String TABLE_TYPE_KEY = "table_type"; - private static final String TABLE_TYPE_ICEBERG_VALUE = "ICEBERG"; + private final IsIcebergTablePredicate isIcebergTablePredicate; + @Autowired + public IcebergTableListenerEventFilter(IsIcebergTablePredicate predicate) { + this.isIcebergTablePredicate = predicate; + // inject and assign predicate + } +// check if the table is an iceberg table and log if it is @Override public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { - Map tableParameters = event.getTableParameters(); - - // Check if the table_type parameter indicates an Iceberg table - if (tableParameters != null) { - String tableType = tableParameters.get(TABLE_TYPE_KEY); - if (TABLE_TYPE_ICEBERG_VALUE.equalsIgnoreCase(tableType)) { - log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); - return true; - } - } - - // Check if the output_format indicates an Iceberg table - String outputFormat = (tableParameters != null) ? tableParameters.get("output_format") : null; - if (outputFormat != null && outputFormat.toLowerCase().contains("iceberg")) { + Table table = createTableFromListenerEvent(event); + if (isIcebergTablePredicate.test(table)) { log.info("Ignoring Iceberg table '{}.{}'.", event.getDbName(), event.getTableName()); + // Logging when iceberg table is ignored, as per ticket return true; } - return false; } + // create table from listener event to be used in predicate + private Table createTableFromListenerEvent(ListenerEvent event) { + // create table from listener event + Table table = new Table(); + table.setDbName(event.getDbName()); + table.setTableName(event.getTableName()); + table.setParameters(event.getTableParameters()); + StorageDescriptor sd = new StorageDescriptor(); + + Map tableParameters = event.getTableParameters(); // retrieve table params and assigns to map + String outputFormat = ""; // initialize output format + if (tableParameters != null) {// if table parameters are not null + outputFormat = tableParameters.getOrDefault("output_format", "");// get output format from table parameters + } + sd.setOutputFormat(outputFormat); // set output format in storafe descriptor + table.setSd(sd); // attach storage descriptor to table + + return table; + } } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index ed9a1825..b4904ae8 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,35 +26,76 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; +import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; + import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; +// class scheduling housekeeping events based on Beekeeper events + +/** + * original flow of the class: + * read beekeperevent → for every housekeepingentity → determine lifecycleeventtype → schedule housekeeping → delete event + * + * updated flow of the class: + * read beekeperevent → extract messageevent → extract listenerevent → determine lifecycleeventtype → + * if iceberg table → ignore & delete event + * else → for every housekeepingentity → determine lifecycleeventtype → schedule housekeeping → delete event + * + */ + @Component public class SchedulerApiary { private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; + private final IcebergTableListenerEventFilter icebergTableListenerEventFilter; @Autowired public SchedulerApiary( BeekeeperEventReader beekeeperEventReader, - EnumMap schedulerServiceMap + EnumMap schedulerServiceMap, + IcebergTableListenerEventFilter icebergTableListenerEventFilter ) { this.beekeeperEventReader = beekeeperEventReader; this.schedulerServiceMap = schedulerServiceMap; + this.icebergTableListenerEventFilter = icebergTableListenerEventFilter; } @Transactional public void scheduleBeekeeperEvent() { - Optional housekeepingEntitiesToBeScheduled = beekeeperEventReader.read(); - if (housekeepingEntitiesToBeScheduled.isEmpty()) { return; } - BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); + Optional beekeeperEventOptional = beekeeperEventReader.read(); + if (beekeeperEventOptional.isEmpty()) { + return; + } + + // extract the messageEvent from beekeeperEvent so we can extract ListenerEvent + // to provide information about the event inc table params to pass to icebergTableListenerEventFilter + BeekeeperEvent beekeeperEvent = beekeeperEventOptional.get(); + MessageEvent messageEvent = beekeeperEvent.getMessageEvent(); + ListenerEvent listenerEvent = messageEvent.getEvent(); + List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); + // we didn't check if housekeepingEntities was empty before, might lead to silent failures? + if (housekeepingEntities.isEmpty()) { + throw new BeekeeperException("No housekeeping entities found in the event"); + } + LifecycleEventType lifecycleEventType = LifecycleEventType.valueOf(housekeepingEntities.get(0).getLifecycleType()); + + // apply Iceberg table filter + if (icebergTableListenerEventFilter.isFiltered(listenerEvent, lifecycleEventType)) { + // delete event and skip processing + beekeeperEventReader.delete(beekeeperEvent); + return; + } + for (HousekeepingEntity entity : housekeepingEntities) { try { LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); @@ -63,8 +104,7 @@ public void scheduleBeekeeperEvent() { } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", - entity.getLifecycleType()), - e); + entity.getLifecycleType()), e); } } @@ -74,4 +114,4 @@ public void scheduleBeekeeperEvent() { public void close() throws IOException { beekeeperEventReader.close(); } -} \ No newline at end of file +} diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java index 5f8bb958..29fe13e0 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -26,14 +26,18 @@ import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.service.predicate.IsIcebergTablePredicate; +// class to test the IcebergTableListenerEventFilter public class IcebergTableListenerEventFilterTest { private IcebergTableListenerEventFilter filter; + private IsIcebergTablePredicate predicate; @BeforeEach public void setUp() { - filter = new IcebergTableListenerEventFilter(); + predicate = new IsIcebergTablePredicate(); + filter = new IcebergTableListenerEventFilter(predicate); } @Test @@ -201,6 +205,7 @@ void testIsFilteredTableWithNullParametersAndStorageDescriptor() { assertThat(result).isFalse(); } + // helper method to create a ListenerEvent private ListenerEvent createListenerEvent(String dbName, String tableName, Map tableParameters) { return new ListenerEvent() { @Override diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index d4388bee..2d5dfcbc 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,6 +47,7 @@ import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.ExpiredHousekeepingMetadataSchedulerService; @@ -61,6 +62,7 @@ public class SchedulerApiaryTest { @Mock private BeekeeperEventReader beekeeperEventReader; @Mock private HousekeepingPath path; @Mock private HousekeepingMetadata table; + @Mock private IcebergTableListenerEventFilter icebergTableListenerEventFilter; private SchedulerApiary scheduler; @@ -69,14 +71,17 @@ public void init() { EnumMap schedulerMap = new EnumMap<>(LifecycleEventType.class); schedulerMap.put(UNREFERENCED, pathSchedulerService); schedulerMap.put(EXPIRED, tableSchedulerService); - scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap); + scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap, icebergTableListenerEventFilter); } @Test public void typicalPathSchedule() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); + scheduler.scheduleBeekeeperEvent(); + verify(pathSchedulerService).scheduleForHousekeeping(path); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -86,16 +91,34 @@ public void typicalPathSchedule() { public void typicalTableSchedule() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); + scheduler.scheduleBeekeeperEvent(); + verify(tableSchedulerService).scheduleForHousekeeping(table); verifyNoInteractions(pathSchedulerService); verify(beekeeperEventReader).delete(event.get()); } + @Test + public void icebergTableEventIsFiltered() { + HousekeepingEntity entity = path; // or table, as appropriate + Optional event = Optional.of(newHousekeepingEvent(entity, UNREFERENCED)); + when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(true); + + scheduler.scheduleBeekeeperEvent(); + + verifyNoInteractions(pathSchedulerService); + verifyNoInteractions(tableSchedulerService); + verify(beekeeperEventReader).delete(event.get()); + } + @Test public void typicalNoSchedule() { when(beekeeperEventReader.read()).thenReturn(Optional.empty()); scheduler.scheduleBeekeeperEvent(); + verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader, times(0)).delete(any()); @@ -105,6 +128,7 @@ public void typicalNoSchedule() { public void housekeepingPathRepositoryThrowsException() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); doThrow(new BeekeeperException("exception")).when(pathSchedulerService).scheduleForHousekeeping(path); try { @@ -125,6 +149,7 @@ public void housekeepingPathRepositoryThrowsException() { public void housekeepingTableRepositoryThrowsException() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); + when(icebergTableListenerEventFilter.isFiltered(any(), any())).thenReturn(false); doThrow(new BeekeeperException("exception")).when(tableSchedulerService).scheduleForHousekeeping(table); try { @@ -152,4 +177,4 @@ private BeekeeperEvent newHousekeepingEvent(HousekeepingEntity housekeepingEntit when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); return new BeekeeperEvent(List.of(housekeepingEntity), Mockito.mock(MessageEvent.class)); } -} \ No newline at end of file +} diff --git a/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java new file mode 100644 index 00000000..a080eab4 --- /dev/null +++ b/beekeeper-scheduler/src/main/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicate.java @@ -0,0 +1,60 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.service.predicate; + +import static org.apache.commons.lang3.StringUtils.containsIgnoreCase; +import static org.apache.commons.lang3.StringUtils.equalsIgnoreCase; + +import java.util.Map; +import java.util.function.Predicate; + +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.springframework.stereotype.Component; + +import lombok.NonNull; + +// class to determine if a table is an iceberg table based on `table_type` parameter +// based off IsIcbergTablePredicate class used in Icekeeper +@Component +public class IsIcebergTablePredicate implements Predicate
{ + + private static final String TABLE_TYPE_KEY = "table_type"; + private static final String TABLE_TYPE_ICEBERG_VALUE = "ICEBERG"; + + @Override + public boolean test(@NonNull Table table) { + return (hasSdProperty(table) || hasTableParameter(table)); + } +// check if the table has the output format property set to iceberg + private boolean hasSdProperty(Table table) { + StorageDescriptor sd = table.getSd(); + if (sd != null) { + String tableOutputFormat = sd.getOutputFormat(); + return containsIgnoreCase(tableOutputFormat, "iceberg"); + } + return false; + } +//retrieve the table parameters and check if the table type is ICEBERG + private boolean hasTableParameter(Table table) { + Map parameters = table.getParameters(); + if (parameters != null) { + String tableType = table.getParameters().get(TABLE_TYPE_KEY); + return equalsIgnoreCase(TABLE_TYPE_ICEBERG_VALUE, tableType); + } + return false; + } +} diff --git a/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java new file mode 100644 index 00000000..bf0f7e77 --- /dev/null +++ b/beekeeper-scheduler/src/test/java/com/expediagroup/beekeeper/scheduler/service/predicate/IsIcebergTablePredicateTest.java @@ -0,0 +1,130 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.service.predicate; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.jupiter.api.Test; + +class IsIcebergTablePredicateTest { + + private IsIcebergTablePredicate predicate = new IsIcebergTablePredicate(); + + @Test + void testIsIcebergTableByTableType() { + Table table = new Table(); + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "ICEBERG"); + table.setParameters(tableParameters); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsIcebergTableByOutputFormat() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + table.setSd(sd); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsIcebergTableByBoth() { + Table table = new Table(); + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "ICEBERG"); + table.setParameters(tableParameters); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + table.setSd(sd); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsNotIcebergTableWithDifferentTableTypeAndOutputFormat() { + Table table = new Table(); + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", "NICEBERG"); + table.setParameters(tableParameters); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.not.an.ice.berg.table"); + table.setSd(sd); + + assertThat(predicate.test(table)).isFalse(); + } + + @Test + void testIsNotIcebergTableWithWrongParameter() { + Table table = new Table(); + table.setParameters(Collections.singletonMap("table_type", "NICEBERG")); + + assertThat(predicate.test(table)).isFalse(); + } + + @Test + void testIsNotIcebergTableWithWrongStorageDescriptor() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.not.an.ice.berg.table"); + table.setSd(sd); + + assertThat(predicate.test(table)).isFalse(); + } + + @Test + void testIsNotIcebergTableWithNoParametersOrSd() { + Table table = new Table(); + + assertThat(predicate.test(table)).isFalse(); + } + + @Test + void testIsIcebergTableWithStorageDescriptorButDifferentTableType() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + table.setSd(sd); + table.setParameters(Collections.singletonMap("table_type", "NICEBERG")); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testIsIcebergTableWithTableTypeButDifferentSd() { + Table table = new Table(); + StorageDescriptor sd = new StorageDescriptor(); + sd.setOutputFormat("org.apache.not.an.ice.berg.table"); + table.setSd(sd); + table.setParameters(Collections.singletonMap("table_type", "ICEBERG")); + + assertThat(predicate.test(table)).isTrue(); + } + + @Test + void testNullTableThrowsException() { + assertThrows(NullPointerException.class, () -> predicate.test(null)); + } +}