-
Notifications
You must be signed in to change notification settings - Fork 132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Async Deletion of Previous Metadata and Statistics Files #312
base: main
Are you sure you want to change the base?
Changes from 34 commits
d0cd456
26e03ac
0f8e8f4
1b525de
e8b26d2
2ee6dee
806f46d
4f1d3c9
0a77bfa
47dc60a
9d835b3
40c6147
f354d1c
88c6651
af3efab
278ab7e
ed30fb0
05c3dd9
49dbe68
8eea50d
d9804e6
54511de
56ba4f2
e92852e
27ea1b3
4d1b68b
eb533d7
47f760f
988e530
4965d5c
5f81483
097189c
651ece0
16bb5fe
d276ae6
187b47e
ba5c47c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,10 +42,13 @@ | |
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* {@link TaskHandler} responsible for deleting all of the files in a manifest and the manifest | ||
* itself. Since data files may be present in multiple manifests across different snapshots, we | ||
* assume a data file that doesn't exist is missing because it was already deleted by another task. | ||
* {@link TaskHandler} responsible for deleting table files: 1. Manifest files: It contains all the | ||
* files in a manifest and the manifest itself. Since data files may be present in multiple | ||
* manifests across different snapshots, we assume a data file that doesn't exist is missing because | ||
* it was already deleted by another task. 2. Table metadata files: It contains previous metadata | ||
* and statistics files, which are grouped and deleted in batch | ||
*/ | ||
// TODO: Rename this class since we introducing metadata cleanup here | ||
public class ManifestFileCleanupTaskHandler implements TaskHandler { | ||
public static final int MAX_ATTEMPTS = 3; | ||
public static final int FILE_DELETION_RETRY_MILLIS = 100; | ||
|
@@ -68,58 +71,110 @@ public boolean canHandleTask(TaskEntity task) { | |
@Override | ||
public boolean handleTask(TaskEntity task) { | ||
ManifestCleanupTask cleanupTask = task.readData(ManifestCleanupTask.class); | ||
ManifestFile manifestFile = decodeManifestData(cleanupTask.getManifestFileData()); | ||
TableIdentifier tableId = cleanupTask.getTableId(); | ||
try (FileIO authorizedFileIO = fileIOSupplier.apply(task)) { | ||
|
||
// if the file doesn't exist, we assume that another task execution was successful, but failed | ||
// to drop the task entity. Log a warning and return success | ||
if (!TaskUtils.exists(manifestFile.path(), authorizedFileIO)) { | ||
if (cleanupTask.getManifestFileData() != null) { | ||
ManifestFile manifestFile = decodeManifestData(cleanupTask.getManifestFileData()); | ||
return cleanUpManifestFile(manifestFile, authorizedFileIO, tableId); | ||
} else if (cleanupTask.getMetadataFiles() != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't want to belabor this, but I don't want to overload this class with logic for handling many different file types. As I mentioned, the In order to avoid bogging down this PR too much, can we just add a second task type for the metadata files and predicate the logic here on the task type rather than testing the presence of |
||
return cleanUpMetadataFiles(cleanupTask.getMetadataFiles(), authorizedFileIO, tableId); | ||
} else { | ||
LOGGER | ||
.atWarn() | ||
.addKeyValue("manifestFile", manifestFile.path()) | ||
.addKeyValue("tableId", tableId) | ||
.log("Manifest cleanup task scheduled, but manifest file doesn't exist"); | ||
.log("Cleanup task scheduled, but input file doesn't exist"); | ||
return true; | ||
} | ||
} | ||
} | ||
|
||
ManifestReader<DataFile> dataFiles = ManifestFiles.read(manifestFile, authorizedFileIO); | ||
List<CompletableFuture<Void>> dataFileDeletes = | ||
StreamSupport.stream( | ||
Spliterators.spliteratorUnknownSize(dataFiles.iterator(), Spliterator.IMMUTABLE), | ||
false) | ||
.map( | ||
file -> | ||
tryDelete( | ||
tableId, authorizedFileIO, manifestFile, file.path().toString(), null, 1)) | ||
.toList(); | ||
LOGGER.debug( | ||
"Scheduled {} data files to be deleted from manifest {}", | ||
dataFileDeletes.size(), | ||
manifestFile.path()); | ||
try { | ||
// wait for all data files to be deleted, then wait for the manifest itself to be deleted | ||
CompletableFuture.allOf(dataFileDeletes.toArray(CompletableFuture[]::new)) | ||
.thenCompose( | ||
(v) -> { | ||
LOGGER | ||
.atInfo() | ||
.addKeyValue("manifestFile", manifestFile.path()) | ||
.log("All data files in manifest deleted - deleting manifest"); | ||
return tryDelete( | ||
tableId, authorizedFileIO, manifestFile, manifestFile.path(), null, 1); | ||
}) | ||
.get(); | ||
return true; | ||
} catch (InterruptedException e) { | ||
LOGGER.error( | ||
"Interrupted exception deleting data files from manifest {}", manifestFile.path(), e); | ||
throw new RuntimeException(e); | ||
} catch (ExecutionException e) { | ||
LOGGER.error("Unable to delete data files from manifest {}", manifestFile.path(), e); | ||
return false; | ||
} | ||
private boolean cleanUpManifestFile( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for the lots of changes here, but don’t worry—it’s mainly because I refactored the deletion logic for the manifest and all its data into a new method; no other changes were made in lines 91-135. |
||
ManifestFile manifestFile, FileIO fileIO, TableIdentifier tableId) { | ||
// if the file doesn't exist, we assume that another task execution was successful, but | ||
// failed to drop the task entity. Log a warning and return success | ||
if (!TaskUtils.exists(manifestFile.path(), fileIO)) { | ||
LOGGER | ||
.atWarn() | ||
.addKeyValue("manifestFile", manifestFile.path()) | ||
.addKeyValue("tableId", tableId) | ||
.log("Manifest cleanup task scheduled, but manifest file doesn't exist"); | ||
return true; | ||
} | ||
|
||
ManifestReader<DataFile> dataFiles = ManifestFiles.read(manifestFile, fileIO); | ||
List<CompletableFuture<Void>> dataFileDeletes = | ||
StreamSupport.stream( | ||
Spliterators.spliteratorUnknownSize(dataFiles.iterator(), Spliterator.IMMUTABLE), | ||
false) | ||
.map(file -> tryDelete(tableId, fileIO, manifestFile, file.path().toString(), null, 1)) | ||
.toList(); | ||
LOGGER.debug( | ||
"Scheduled {} data files to be deleted from manifest {}", | ||
dataFileDeletes.size(), | ||
manifestFile.path()); | ||
try { | ||
// wait for all data files to be deleted, then wait for the manifest itself to be deleted | ||
CompletableFuture.allOf(dataFileDeletes.toArray(CompletableFuture[]::new)) | ||
.thenCompose( | ||
(v) -> { | ||
LOGGER | ||
.atInfo() | ||
.addKeyValue("manifestFile", manifestFile.path()) | ||
.log("All data files in manifest deleted - deleting manifest"); | ||
return tryDelete(tableId, fileIO, manifestFile, manifestFile.path(), null, 1); | ||
}) | ||
.get(); | ||
return true; | ||
} catch (InterruptedException e) { | ||
LOGGER.error( | ||
"Interrupted exception deleting data files from manifest {}", manifestFile.path(), e); | ||
throw new RuntimeException(e); | ||
} catch (ExecutionException e) { | ||
LOGGER.error("Unable to delete data files from manifest {}", manifestFile.path(), e); | ||
return false; | ||
} | ||
} | ||
|
||
private boolean cleanUpMetadataFiles( | ||
List<String> metadataFiles, FileIO fileIO, TableIdentifier tableId) { | ||
List<String> validFiles = | ||
metadataFiles.stream().filter(file -> TaskUtils.exists(file, fileIO)).toList(); | ||
if (validFiles.isEmpty()) { | ||
LOGGER | ||
.atWarn() | ||
.addKeyValue("metadataFiles", metadataFiles.toString()) | ||
.addKeyValue("tableId", tableId) | ||
.log("Table metadata cleanup task scheduled, but the none of the file in batch exists"); | ||
return true; | ||
} | ||
if (validFiles.size() < metadataFiles.size()) { | ||
List<String> missingFiles = | ||
metadataFiles.stream().filter(file -> !TaskUtils.exists(file, fileIO)).toList(); | ||
LOGGER | ||
.atWarn() | ||
.addKeyValue("metadataFiles", metadataFiles.toString()) | ||
.addKeyValue("missingFiles", missingFiles) | ||
.addKeyValue("tableId", tableId) | ||
.log( | ||
"Table metadata cleanup task scheduled, but {} files in the batch are missing", | ||
missingFiles.size()); | ||
} | ||
|
||
// Schedule the deletion for each file asynchronously | ||
List<CompletableFuture<Void>> deleteFutures = | ||
validFiles.stream().map(file -> tryDelete(tableId, fileIO, null, file, null, 1)).toList(); | ||
|
||
try { | ||
// Wait for all delete operations to finish | ||
CompletableFuture<Void> allDeletes = | ||
CompletableFuture.allOf(deleteFutures.toArray(new CompletableFuture[0])); | ||
allDeletes.join(); | ||
} catch (Exception e) { | ||
LOGGER.error("Exception detected during metadata file deletion", e); | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
private static ManifestFile decodeManifestData(String manifestFileData) { | ||
|
@@ -134,16 +189,16 @@ private CompletableFuture<Void> tryDelete( | |
TableIdentifier tableId, | ||
FileIO fileIO, | ||
ManifestFile manifestFile, | ||
String dataFile, | ||
String file, | ||
Throwable e, | ||
int attempt) { | ||
if (e != null && attempt <= MAX_ATTEMPTS) { | ||
LOGGER | ||
.atWarn() | ||
.addKeyValue("dataFile", dataFile) | ||
.addKeyValue("file", file) | ||
.addKeyValue("attempt", attempt) | ||
.addKeyValue("error", e.getMessage()) | ||
.log("Error encountered attempting to delete data file"); | ||
.log("Error encountered attempting to delete file"); | ||
} | ||
if (attempt > MAX_ATTEMPTS && e != null) { | ||
return CompletableFuture.failedFuture(e); | ||
|
@@ -155,27 +210,27 @@ private CompletableFuture<Void> tryDelete( | |
// file's existence, but then it is deleted before we have a chance to | ||
// send the delete request. In such a case, we <i>should</i> retry | ||
// and find | ||
if (TaskUtils.exists(dataFile, fileIO)) { | ||
fileIO.deleteFile(dataFile); | ||
if (TaskUtils.exists(file, fileIO)) { | ||
fileIO.deleteFile(file); | ||
} else { | ||
LOGGER | ||
.atInfo() | ||
.addKeyValue("dataFile", dataFile) | ||
.addKeyValue("manifestFile", manifestFile.path()) | ||
.addKeyValue("file", file) | ||
.addKeyValue("manifestFile", manifestFile != null ? manifestFile.path() : "") | ||
.addKeyValue("tableId", tableId) | ||
.log("Manifest cleanup task scheduled, but data file doesn't exist"); | ||
.log("table file cleanup task scheduled, but data file doesn't exist"); | ||
} | ||
}, | ||
executorService) | ||
.exceptionallyComposeAsync( | ||
newEx -> { | ||
LOGGER | ||
.atWarn() | ||
.addKeyValue("dataFile", dataFile) | ||
.addKeyValue("tableIdentifer", tableId) | ||
.addKeyValue("manifestFile", manifestFile.path()) | ||
.addKeyValue("dataFile", file) | ||
.addKeyValue("tableIdentifier", tableId) | ||
.addKeyValue("manifestFile", manifestFile != null ? manifestFile.path() : "") | ||
.log("Exception caught deleting data file from manifest", newEx); | ||
return tryDelete(tableId, fileIO, manifestFile, dataFile, newEx, attempt + 1); | ||
return tryDelete(tableId, fileIO, manifestFile, file, newEx, attempt + 1); | ||
}, | ||
CompletableFuture.delayedExecutor( | ||
FILE_DELETION_RETRY_MILLIS, TimeUnit.MILLISECONDS, executorService)); | ||
|
@@ -185,12 +240,18 @@ private CompletableFuture<Void> tryDelete( | |
public static final class ManifestCleanupTask { | ||
private TableIdentifier tableId; | ||
private String manifestFileData; | ||
private List<String> metadataFiles; | ||
|
||
public ManifestCleanupTask(TableIdentifier tableId, String manifestFileData) { | ||
this.tableId = tableId; | ||
this.manifestFileData = manifestFileData; | ||
} | ||
|
||
public ManifestCleanupTask(TableIdentifier tableId, List<String> metadataFiles) { | ||
this.tableId = tableId; | ||
this.metadataFiles = metadataFiles; | ||
} | ||
|
||
public ManifestCleanupTask() {} | ||
|
||
public TableIdentifier getTableId() { | ||
|
@@ -209,17 +270,26 @@ public void setManifestFileData(String manifestFileData) { | |
this.manifestFileData = manifestFileData; | ||
} | ||
|
||
public List<String> getMetadataFiles() { | ||
return metadataFiles; | ||
} | ||
|
||
public void setMetadataFiles(List<String> metadataFiles) { | ||
this.metadataFiles = metadataFiles; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object object) { | ||
if (this == object) return true; | ||
if (!(object instanceof ManifestCleanupTask that)) return false; | ||
return Objects.equals(tableId, that.tableId) | ||
&& Objects.equals(manifestFileData, that.manifestFileData); | ||
&& Objects.equals(manifestFileData, that.manifestFileData) | ||
&& Objects.equals(metadataFiles, that.metadataFiles); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(tableId, manifestFileData); | ||
return Objects.hash(tableId, manifestFileData, metadataFiles); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renaming this task will triger lots of relevent changes. If a rename is needed, we may want to handle it in a separate PR to avoid too much changes (Leave a TODO here)