diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 18c63fbe7bb1..a726040a1473 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1146,6 +1146,10 @@ acceptedBreaks: \ org.apache.iceberg.TableMetadata)" justification: "Removing deprecated code" "1.7.0": + org.apache.iceberg:iceberg-api: + - code: "java.method.addedToInterface" + new: "method org.apache.iceberg.ExpireSnapshots org.apache.iceberg.ExpireSnapshots::keepAtMost(int)" + justification: "Add new method" org.apache.iceberg:iceberg-core: - code: "java.method.removed" old: "method org.apache.iceberg.deletes.PositionDeleteIndex\ diff --git a/api/src/main/java/org/apache/iceberg/ExpireSnapshots.java b/api/src/main/java/org/apache/iceberg/ExpireSnapshots.java index f6524a1d4fba..e503940c5ff9 100644 --- a/api/src/main/java/org/apache/iceberg/ExpireSnapshots.java +++ b/api/src/main/java/org/apache/iceberg/ExpireSnapshots.java @@ -50,11 +50,24 @@ public interface ExpireSnapshots extends PendingUpdate> { /** * Expires all snapshots older than the given timestamp. * + *

A snapshot kept by this method can still be expired by {@link #keepAtMost(int)}. + * * @param timestampMillis a long timestamp, as returned by {@link System#currentTimeMillis()} * @return this for method chaining */ ExpireSnapshots expireOlderThan(long timestampMillis); + /** + * Keeps at most {@code numSnapshots} ancestors of the current snapshot and expires older + * ancestors. + * + *

A snapshot kept by this method can still be expired by {@link #expireOlderThan(long)}. + * + * @param numSnapshots the number of snapshots to keep at most + * @return this for method chaining + */ + ExpireSnapshots keepAtMost(int numSnapshots); + /** * Retains the most recent ancestors of the current snapshot. * diff --git a/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java b/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java index 7558ea7d8a3e..241a7e38fb1d 100644 --- a/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java +++ b/core/src/main/java/org/apache/iceberg/RemoveSnapshots.java @@ -30,6 +30,8 @@ import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; import static org.apache.iceberg.TableProperties.MAX_REF_AGE_MS; import static org.apache.iceberg.TableProperties.MAX_REF_AGE_MS_DEFAULT; +import static org.apache.iceberg.TableProperties.MAX_SNAPSHOTS_TO_KEEP; +import static org.apache.iceberg.TableProperties.MAX_SNAPSHOTS_TO_KEEP_DEFAULT; import static org.apache.iceberg.TableProperties.MAX_SNAPSHOT_AGE_MS; import static org.apache.iceberg.TableProperties.MAX_SNAPSHOT_AGE_MS_DEFAULT; import static org.apache.iceberg.TableProperties.MIN_SNAPSHOTS_TO_KEEP; @@ -80,6 +82,7 @@ public void accept(String file) { private TableMetadata base; private long defaultExpireOlderThan; private int defaultMinNumSnapshots; + private int defaultMaxNumSnapshots; private Consumer deleteFunc = defaultDelete; private ExecutorService deleteExecutorService = DEFAULT_DELETE_EXECUTOR_SERVICE; private ExecutorService planExecutorService = ThreadPools.getWorkerPool(); @@ -102,6 +105,9 @@ public void accept(String file) { this.defaultMinNumSnapshots = PropertyUtil.propertyAsInt( base.properties(), MIN_SNAPSHOTS_TO_KEEP, MIN_SNAPSHOTS_TO_KEEP_DEFAULT); + this.defaultMaxNumSnapshots = + PropertyUtil.propertyAsInt( + base.properties(), MAX_SNAPSHOTS_TO_KEEP, MAX_SNAPSHOTS_TO_KEEP_DEFAULT); this.defaultMaxRefAgeMs = PropertyUtil.propertyAsLong(base.properties(), MAX_REF_AGE_MS, MAX_REF_AGE_MS_DEFAULT); @@ -131,6 +137,16 @@ public ExpireSnapshots expireOlderThan(long timestampMillis) { return this; } + @Override + public ExpireSnapshots keepAtMost(int numSnapshots) { + Preconditions.checkArgument( + 1 <= numSnapshots, + "Number of snapshots to keep at most must be at least 1, cannot be: %s", + numSnapshots); + this.defaultMaxNumSnapshots = numSnapshots; + return this; + } + @Override public ExpireSnapshots retainLast(int numSnapshots) { Preconditions.checkArgument( @@ -245,9 +261,13 @@ private Set computeAllBranchSnapshotsToRetain(Collection refs ref.maxSnapshotAgeMs() != null ? now - ref.maxSnapshotAgeMs() : defaultExpireOlderThan; int minSnapshotsToKeep = ref.minSnapshotsToKeep() != null ? ref.minSnapshotsToKeep() : defaultMinNumSnapshots; + int maxSnapshotsToKeep = Math.max(defaultMaxNumSnapshots, minSnapshotsToKeep); branchSnapshotsToRetain.addAll( computeBranchSnapshotsToRetain( - ref.snapshotId(), expireSnapshotsOlderThan, minSnapshotsToKeep)); + ref.snapshotId(), + expireSnapshotsOlderThan, + minSnapshotsToKeep, + maxSnapshotsToKeep)); } } @@ -255,11 +275,16 @@ private Set computeAllBranchSnapshotsToRetain(Collection refs } private Set computeBranchSnapshotsToRetain( - long snapshot, long expireSnapshotsOlderThan, int minSnapshotsToKeep) { + long snapshot, + long expireSnapshotsOlderThan, + int minSnapshotsToKeep, + int maxSnapshotsToKeep) { Set idsToRetain = Sets.newHashSet(); for (Snapshot ancestor : SnapshotUtil.ancestorsOf(snapshot, base::snapshot)) { - if (idsToRetain.size() < minSnapshotsToKeep - || ancestor.timestampMillis() >= expireSnapshotsOlderThan) { + int retainSize = idsToRetain.size(); + if (retainSize < minSnapshotsToKeep + || (retainSize < maxSnapshotsToKeep + && ancestor.timestampMillis() >= expireSnapshotsOlderThan)) { idsToRetain.add(ancestor.snapshotId()); } else { return idsToRetain; diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index c137bcd3a2c3..b262684cb01e 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -349,6 +349,9 @@ private TableProperties() {} public static final String MIN_SNAPSHOTS_TO_KEEP = "history.expire.min-snapshots-to-keep"; public static final int MIN_SNAPSHOTS_TO_KEEP_DEFAULT = 1; + public static final String MAX_SNAPSHOTS_TO_KEEP = "history.expire.max-snapshots-to-keep"; + public static final int MAX_SNAPSHOTS_TO_KEEP_DEFAULT = Integer.MAX_VALUE; + public static final String MAX_REF_AGE_MS = "history.expire.max-ref-age-ms"; public static final long MAX_REF_AGE_MS_DEFAULT = Long.MAX_VALUE;