diff --git a/CHANGELOG.md b/CHANGELOG.md index 36a59cba00d..1668ec92a28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ as necessary. Empty sections will not end in the release notes. ### New Features +- Add functionality to the Nessie server admin tool, the `cleanup-repository` command, to delete + unneeded objects from a Nessie repository (backend database). + ### Changes - The persistence cache tries to avoid deserialization overhead when getting an object from the diff --git a/site/in-dev/export_import.md b/site/in-dev/export_import.md index 6863d76b49e..9de8d3bc375 100644 --- a/site/in-dev/export_import.md +++ b/site/in-dev/export_import.md @@ -223,6 +223,10 @@ Below is the output of the Nessie Server Admin tool help for all commands. {% include './generated-docs/serverAdmin-help-delete-catalog-tasks.md' %} +### `cleanup-repository` + +{% include './generated-docs/serverAdmin-help-cleanup-repository.md' %} + ### `erase-repository` {% include './generated-docs/serverAdmin-help-erase-repository.md' %} diff --git a/tools/doc-generator/site-gen/build.gradle.kts b/tools/doc-generator/site-gen/build.gradle.kts index c6374556b23..e414ceaabeb 100644 --- a/tools/doc-generator/site-gen/build.gradle.kts +++ b/tools/doc-generator/site-gen/build.gradle.kts @@ -242,6 +242,7 @@ val serverAdminHelp by tasks.registering(JavaExec::class) { "info", "check-content", "delete-catalog-tasks", + "cleanup-repository", "erase-repository", "export", "import", diff --git a/tools/server-admin/build.gradle.kts b/tools/server-admin/build.gradle.kts index 3041d1e0fd7..051d55668b0 100644 --- a/tools/server-admin/build.gradle.kts +++ b/tools/server-admin/build.gradle.kts @@ -49,6 +49,7 @@ dependencies { implementation(project(":nessie-versioned-storage-cache")) implementation(project(":nessie-versioned-storage-cassandra")) implementation(project(":nessie-versioned-storage-cassandra2")) + implementation(project(":nessie-versioned-storage-cleanup")) implementation(project(":nessie-versioned-storage-common")) implementation(project(":nessie-versioned-storage-dynamodb")) implementation(project(":nessie-versioned-storage-dynamodb2")) @@ -187,7 +188,7 @@ if (Os.isFamily(Os.FAMILY_MAC) && System.getenv("CI") != null) { tasks.named("intTest").configure { // Reduce likelihood of OOM due to too many Quarkus apps in memory; // Ideally, set this number to the number of IT classes to run for each backend. - forkEvery = 5 + forkEvery = 6 // Optional; comma-separated list of backend names to test against; // see NessieServerAdminTestBackends for valid values. systemProperty("backends", System.getProperty("backends")) diff --git a/tools/server-admin/src/intTest/java/org/projectnessie/tools/admin/cli/ITCleanupRepository.java b/tools/server-admin/src/intTest/java/org/projectnessie/tools/admin/cli/ITCleanupRepository.java new file mode 100644 index 00000000000..a407bba417b --- /dev/null +++ b/tools/server-admin/src/intTest/java/org/projectnessie/tools/admin/cli/ITCleanupRepository.java @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2024 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.admin.cli; + +import io.quarkus.test.junit.TestProfile; +import io.quarkus.test.junit.main.QuarkusMainLauncher; +import io.quarkus.test.junit.main.QuarkusMainTest; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.projectnessie.quarkus.tests.profiles.BaseConfigProfile; +import org.projectnessie.versioned.storage.common.persist.Persist; + +@QuarkusMainTest +@TestProfile(BaseConfigProfile.class) +@ExtendWith({NessieServerAdminTestExtension.class, SoftAssertionsExtension.class}) +class ITCleanupRepository extends AbstractContentTests { + + @InjectSoftAssertions private SoftAssertions soft; + + ITCleanupRepository(Persist persist) { + super(persist, Object.class); + } + + @Test + public void testCleanup(QuarkusMainLauncher launcher) { + var launchResult = launcher.launch("cleanup-repository"); + soft.assertThat(launchResult.exitCode()).isEqualTo(0); + soft.assertThat(launchResult.getOutputStream()) + .contains( + "Identifying referenced objects, processing unlimited commits per second, processing unlimited objects per second, expecting max 1000000 objects, estimated context heap pressure: 18.396 M") + .anyMatch( + s -> + s.matches( + "Finished identifying referenced objects after PT.*. Processed 3 references, 3 commits, 2 objects, 0 contents.")) + .anyMatch( + s -> + s.matches( + "Purging unreferenced objects, referenced before .*, scanning unlimited objects per second, deleting unlimited objects per second, estimated context heap pressure: 5.713 M")) + .anyMatch( + s -> + s.matches( + "Finished purging unreferenced objects after PT.*. Scanned 5 objects, 0 were deleted.")); + } + + @Test + public void testCleanupParams(QuarkusMainLauncher launcher) { + var launchResult = + launcher.launch( + "cleanup-repository", + CleanupRepository.DRY_RUN, + CleanupRepository.COMMIT_RATE, + "11", + CleanupRepository.OBJ_RATE, + "12", + CleanupRepository.SCAN_OBJ_RATE, + "13", + CleanupRepository.PURGE_OBJ_RATE, + "14", + CleanupRepository.REFERENCED_GRACE, + "P10D", + CleanupRepository.OBJ_COUNT, + "1000"); + soft.assertThat(launchResult.exitCode()).isEqualTo(0); + soft.assertThat(launchResult.getOutputStream()) + .contains( + "Identifying referenced objects, processing 11 commits per second, processing 12 objects per second, expecting max 1000 objects, estimated context heap pressure: 12.688 M") + .anyMatch( + s -> + s.matches( + "Finished identifying referenced objects after PT.*. Processed 3 references, 3 commits, 2 objects, 0 contents.")) + .anyMatch( + s -> + s.matches( + "Dry-run cleanup unreferenced objects, referenced before .*, scanning 13 objects per second, deleting 14 objects per second, estimated context heap pressure: 0.006 M")) + .anyMatch( + s -> + s.matches( + "Finished purging unreferenced objects after PT.*. Scanned 5 objects, 0 were deleted.")); + } +} diff --git a/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/CleanupRepository.java b/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/CleanupRepository.java new file mode 100644 index 00000000000..85bcbb2d9eb --- /dev/null +++ b/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/CleanupRepository.java @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2024 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.admin.cli; + +import static java.time.Instant.ofEpochMilli; +import static java.time.temporal.ChronoUnit.MICROS; +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.projectnessie.versioned.storage.cleanup.Cleanup.createCleanup; +import static org.projectnessie.versioned.storage.cleanup.CleanupParams.DEFAULT_ALLOWED_FALSE_POSITIVE_PROBABILITY; +import static org.projectnessie.versioned.storage.cleanup.CleanupParams.DEFAULT_EXPECTED_OBJ_COUNT; +import static org.projectnessie.versioned.storage.cleanup.CleanupParams.DEFAULT_FALSE_POSITIVE_PROBABILITY; +import static org.projectnessie.versioned.storage.cleanup.CleanupParams.DEFAULT_RECENT_OBJ_IDS_FILTER_SIZE; +import static org.projectnessie.versioned.storage.common.logic.Logics.repositoryLogic; + +import java.time.Duration; +import org.projectnessie.versioned.storage.cleanup.CleanupParams; +import org.projectnessie.versioned.storage.cleanup.MustRestartWithBiggerFilterException; +import org.projectnessie.versioned.storage.cleanup.ResolveResult; +import picocli.CommandLine; + +@CommandLine.Command( + name = "cleanup-repository", + mixinStandardHelpOptions = true, + description = { + "Cleanup unreferenced data from Nessie's repository.", + "This is a two-phase implementation that first identifies the objects that are referenced and the second phase scans the whole repository and deletes objects that are unreferenced.", + "It is recommended to run this command regularly, but with appropriate rate limits using the " + + CleanupRepository.COMMIT_RATE + + ", " + + CleanupRepository.OBJ_RATE + + ", " + + CleanupRepository.SCAN_OBJ_RATE + + ", " + + CleanupRepository.PURGE_OBJ_RATE + + " which does not overload your backend database system.", + "The implementation uses a bloom-filter to identify the IDs of referenced objects. The default setting is to allow for " + + DEFAULT_EXPECTED_OBJ_COUNT + + " objects in the backend database with an FPP of " + + DEFAULT_FALSE_POSITIVE_PROBABILITY + + ". These values should serve most repositories. However, if your repository is quite big, you should supply a higher expected object count using the " + + CleanupRepository.OBJ_COUNT + + " option. If the implementation detected that the bloom-filter would exceed the maximum allowed FPP, it would restart with a higher number of expected objects.", + "In rare situations with an extremely huge amount of objects, the data structures may require a lot of memory. The estimated heap pressure for the contextual data structures is printed to the console.", + "If you are unsure whether this command works fine, specify the " + + CleanupRepository.DRY_RUN + + " option to perform all operations except deleting objects." + }) +public class CleanupRepository extends BaseCommand { + + public static final String OBJ_COUNT = "--obj-count"; + public static final String COMMIT_RATE = "--commit-rate"; + public static final String OBJ_RATE = "--obj-rate"; + public static final String SCAN_OBJ_RATE = "--scan-obj-rate"; + public static final String PURGE_OBJ_RATE = "--purge-obj-rate"; + public static final String DRY_RUN = "--dry-run"; + public static final String REFERENCED_GRACE = "--referenced-grace"; + + @CommandLine.Option( + names = {DRY_RUN}, + description = "Perform all operations, but do not delete any object .") + private boolean dryRun; + + @CommandLine.Option( + names = {REFERENCED_GRACE}, + description = + "Grace-time for newly created objects to not be deleted. Default is just \"now\". Specified using the ISO-8601 format, for example P1D (24 hours) or PT2H (2 hours) or P10D12H (10 * 24 + 10 hours).") + private Duration objReferencedGrace; + + @CommandLine.Option( + names = {OBJ_COUNT}, + description = "Number of expected objects, defaults to " + DEFAULT_EXPECTED_OBJ_COUNT + '.') + private long expectedObjCount = DEFAULT_EXPECTED_OBJ_COUNT; + + @CommandLine.Option( + names = {"--fpp"}, + description = + "Default false-positive-probability to detect referenced objects, defaults to " + + DEFAULT_FALSE_POSITIVE_PROBABILITY + + '.') + private double falsePositiveProbability = DEFAULT_FALSE_POSITIVE_PROBABILITY; + + @CommandLine.Option( + names = {"--allowed-fpp"}, + description = + "Maximum allowed false-positive-probability to detect referenced objects, defaults to " + + DEFAULT_ALLOWED_FALSE_POSITIVE_PROBABILITY + + '.') + private double allowedFalsePositiveProbability = DEFAULT_ALLOWED_FALSE_POSITIVE_PROBABILITY; + + @CommandLine.Option( + names = {COMMIT_RATE}, + description = + "Allowed number of commits to process during the 'resolve' phase per second. Default is unlimited.") + private int resolveCommitRatePerSecond = 0; + + @CommandLine.Option( + names = {OBJ_RATE}, + description = + "Allowed number of objects to process during the 'resolve' phase per second. Default is unlimited.") + private int resolveObjRatePerSecond = 0; + + @CommandLine.Option( + names = {"--pending-objs-batch-size"}, + description = "") + private int pendingObjsBatchSize = CleanupParams.DEFAULT_PENDING_OBJS_BATCH_SIZE; + + @CommandLine.Option( + names = {"--allow-duplicate-commit-traversal"}, + description = + "Allow traversal of the same commit more than once. This is disabled by default.") + private boolean allowDuplicateCommitTraversals = false; + + @CommandLine.Option( + names = {SCAN_OBJ_RATE}, + description = + "Allowed number of objects to scan during the 'purge' phase per second. Default is unlimited.") + private int purgeScanObjRatePerSecond = 0; + + @CommandLine.Option( + names = {PURGE_OBJ_RATE}, + description = + "Allowed number of objects to delete during the 'purge' phase per second. Default is unlimited.") + private int purgeDeleteObjRatePerSecond = 0; + + @CommandLine.Option( + names = {"--recent-objs-ids-filter-size"}, + description = + "Size of the filter to recognize recently processed objects. This helps to reduce effort, but should be kept to a reasonable number. Defaults to " + + DEFAULT_RECENT_OBJ_IDS_FILTER_SIZE + + '.') + private int recentObjIdsFilterSize = DEFAULT_RECENT_OBJ_IDS_FILTER_SIZE; + + @Override + public Integer call() { + warnOnInMemory(); + + if (!repositoryLogic(persist).repositoryExists()) { + spec.commandLine().getErr().println("Nessie repository does not exist"); + return EXIT_CODE_REPO_DOES_NOT_EXIST; + } + + CleanupParams cleanupParams = + CleanupParams.builder() + .expectedObjCount(expectedObjCount) + .falsePositiveProbability(falsePositiveProbability) + .allowedFalsePositiveProbability(allowedFalsePositiveProbability) + .resolveCommitRatePerSecond(resolveCommitRatePerSecond) + .resolveObjRatePerSecond(resolveObjRatePerSecond) + .pendingObjsBatchSize(pendingObjsBatchSize) + .allowDuplicateCommitTraversals(allowDuplicateCommitTraversals) + .purgeScanObjRatePerSecond(purgeScanObjRatePerSecond) + .purgeDeleteObjRatePerSecond(purgeDeleteObjRatePerSecond) + .recentObjIdsFilterSize(recentObjIdsFilterSize) + .dryRun(dryRun) + .build(); + + // timestamp in MICROseconds since epoch + var maxObjReferenced = persist.config().currentTimeMicros(); + if (objReferencedGrace != null) { + var millis = objReferencedGrace.toMillis(); + if (millis < 0) { + spec.commandLine() + .getErr() + .println("Argument for " + objReferencedGrace + " must not be negative!"); + return EXIT_CODE_GENERIC_ERROR; + } + maxObjReferenced -= MILLISECONDS.toMicros(millis); + } + + ResolveResult resolveResult; + while (true) { + var cleanup = createCleanup(cleanupParams); + var referencedObjectsContext = + cleanup.buildReferencedObjectsContext(persist, maxObjReferenced); + var referencedObjectsResolver = + cleanup.createReferencedObjectsResolver(referencedObjectsContext); + + try { + spec.commandLine() + .getOut() + .printf( + "Identifying referenced objects, processing %s commits per second, processing %s objects per second, expecting max %d objects, estimated context heap pressure: %.3f M%n", + resolveCommitRatePerSecond > 0 + ? Integer.toString(resolveCommitRatePerSecond) + : "unlimited", + resolveObjRatePerSecond > 0 + ? Integer.toString(resolveObjRatePerSecond) + : "unlimited", + expectedObjCount, + (double) referencedObjectsResolver.estimatedHeapPressure() / 1024L / 1024L); + resolveResult = referencedObjectsResolver.resolve(); + + spec.commandLine() + .getOut() + .printf( + "Finished identifying referenced objects after %s. Processed %d references, %d commits, %d objects, %d contents.%n", + resolveResult.stats().duration(), + resolveResult.stats().numReferences(), + resolveResult.stats().numUniqueCommits(), + resolveResult.stats().numObjs(), + resolveResult.stats().numContents()); + + var purgeObjects = cleanup.createPurgeObjects(resolveResult.purgeObjectsContext()); + + spec.commandLine() + .getOut() + .printf( + "%s unreferenced objects, referenced before %s, scanning %s objects per second, deleting %s objects per second, estimated context heap pressure: %.3f M%n", + dryRun ? "Dry-run cleanup" : "Purging", + ofEpochMilli(MICROSECONDS.toMillis(maxObjReferenced)) + .plus(maxObjReferenced % 1000, MICROS), + purgeScanObjRatePerSecond > 0 + ? Integer.toString(purgeScanObjRatePerSecond) + : "unlimited", + purgeDeleteObjRatePerSecond > 0 + ? Integer.toString(purgeDeleteObjRatePerSecond) + : "unlimited", + (double) purgeObjects.estimatedHeapPressure() / 1024L / 1024L); + + var purgeResult = purgeObjects.purge(); + + spec.commandLine() + .getOut() + .printf( + "Finished purging unreferenced objects after %s. Scanned %d objects, %d were deleted.%n", + resolveResult.stats().duration(), + purgeResult.stats().numScannedObjs(), + purgeResult.stats().numPurgedObjs()); + + break; + } catch (MustRestartWithBiggerFilterException e) { + var prev = cleanupParams.expectedObjCount(); + cleanupParams = cleanupParams.withIncreasedExpectedObjCount(); + + spec.commandLine() + .getErr() + .printf( + "Restarting identify referenced objects stage with increased expected object count from %d to %d. Please run this command the next time with the option '%s %d'%n", + prev, + cleanupParams.expectedObjCount(), + OBJ_COUNT, + cleanupParams.expectedObjCount()); + } + } + + return 0; + } +} diff --git a/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/NessieServerAdminTool.java b/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/NessieServerAdminTool.java index 568b1c17d86..744a095644a 100644 --- a/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/NessieServerAdminTool.java +++ b/tools/server-admin/src/main/java/org/projectnessie/tools/admin/cli/NessieServerAdminTool.java @@ -29,6 +29,7 @@ subcommands = { NessieInfo.class, HelpCommand.class, + CleanupRepository.class, CheckContent.class, DeleteCatalogTasks.class, EraseRepository.class,