-
Notifications
You must be signed in to change notification settings - Fork 89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
indexserver: add debug endpoint for deleting repository shards #485
base: main
Are you sure you want to change the base?
Changes from all commits
2cb0213
76a18b6
828806c
e53fada
1ab8eb7
76ae04c
4d58e64
4d8cf41
3270e03
83f68e7
4c2077d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"io/fs" | ||
"log" | ||
"os" | ||
"os/exec" | ||
|
@@ -92,23 +94,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) | |
} | ||
log.Printf("removing shards for %v due to multiple repository names: %s", repo, strings.Join(paths, " ")) | ||
|
||
// We may be in both normal and compound shards in this case. First | ||
// tombstone the compound shards so we don't just rm them. | ||
simple := shards[:0] | ||
for _, s := range shards { | ||
if shardMerging && maybeSetTombstone([]shard{s}, repo) { | ||
shardsLog(indexDir, "tombname", []shard{s}) | ||
} else { | ||
simple = append(simple, s) | ||
} | ||
} | ||
|
||
if len(simple) == 0 { | ||
continue | ||
} | ||
|
||
removeAll(simple...) | ||
shardsLog(indexDir, "removename", simple) | ||
deleteOrTombstone(indexDir, repo, shardMerging, shards...) | ||
} | ||
|
||
// index: Move missing repos from trash into index | ||
|
@@ -172,6 +158,38 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) | |
} | ||
} | ||
|
||
// remove any Zoekt metadata files in the given dir that don't have an | ||
// associated shard file | ||
metaFiles, err := filepath.Glob(filepath.Join(indexDir, "*.meta")) | ||
if err != nil { | ||
log.Printf("failed to glob %q for stranded metadata files: %s", indexDir, err) | ||
} else { | ||
for _, metaFile := range metaFiles { | ||
shard := strings.TrimSuffix(metaFile, ".meta") | ||
_, err := os.Stat(shard) | ||
if err == nil { | ||
// metadata file has associated shard | ||
continue | ||
} | ||
|
||
if !errors.Is(err, fs.ErrNotExist) { | ||
log.Printf("failed to stat metadata file %q: %s", metaFile, err) | ||
continue | ||
} | ||
|
||
// metadata doesn't have an associated shard file, remove the metadata file | ||
|
||
err = os.Remove(metaFile) | ||
if err != nil { | ||
log.Printf("failed to remove stranded metadata file %q: %s", metaFile, err) | ||
continue | ||
} else { | ||
log.Printf("removed stranded metadata file: %s", metaFile) | ||
} | ||
|
||
} | ||
} | ||
|
||
metricCleanupDuration.Observe(time.Since(start).Seconds()) | ||
} | ||
|
||
|
@@ -292,11 +310,43 @@ func removeAll(shards ...shard) { | |
// potential for a partial index for a repo. However, this should be | ||
// exceedingly rare due to it being a mix of partial failure on something in | ||
// trash + an admin re-adding a repository. | ||
for _, shard := range shards { | ||
|
||
if len(shards) == 0 { | ||
return | ||
} | ||
|
||
// Ensure that the paths are in reverse sorted order to ensure that Zoekt's repository <-> shard matching logic | ||
// works correctly + ensure that we don't leave behind partial state. | ||
// | ||
// Example: - repoA_v16.00002.zoekt | ||
// - repoA_v16.00001.zoekt | ||
// - repoA_v16.00000.zoekt | ||
// | ||
// zoekt-indexserver checks whether it has indexed "repoA" by first checking to see if the 0th shard | ||
// is present (repoA_v16.00000.zoekt). | ||
// - If it's present, then it gathers all rest of the shards names in ascending order (...00001.zoekt, ...00002.zoekt). | ||
// - If it's missing, then zoekt assumes that it never indexed "repoA" (the remaining data from shards 1 & 2 is effectively invisible) | ||
// | ||
// If this function were to crash while deleting repoA, and we only deleted the 0th shard, then : | ||
// - zoekt would think that there is no data for that repository (despite the partial data from | ||
// - it's possible for zoekt to show inconsistent state when re-indexing the repository (zoekt incorrectly | ||
// associates the data from shards 1 and 2 with the "new" shard 0 data (from a newer commit)) | ||
// | ||
// Deleting shards in reverse sorted order (2 -> 1 -> 0) always ensures that we don't leave an inconsistent | ||
// state behind even if we crash. | ||
Comment on lines
+335
to
+336
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but if we do crash we will have partial state and be none the wiser anyways. So I think this is just extra complexity without making us any safer than before. In fact I'd argue deleting 0 first is the best bet, since that is what we actually check in other parts. So if that is missing then we end up deleting the other ones. Then adding a check in cleanup to remove stranded indexes would make sense. At the end of the day we are relying on the fact that os.Remove is super fast. Not ideal, and really we should introduce some other way of being atomic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can add an extra file that acts as a tombstone? for a given repo if |
||
|
||
sortedShards := append([]shard{}, shards...) | ||
|
||
sort.Slice(sortedShards, func(i, j int) bool { | ||
return sortedShards[i].Path > sortedShards[j].Path | ||
}) | ||
|
||
for _, shard := range sortedShards { | ||
paths, err := zoekt.IndexFilePaths(shard.Path) | ||
if err != nil { | ||
debug.Printf("failed to remove shard %s: %v", shard.Path, err) | ||
} | ||
|
||
for _, p := range paths { | ||
if err := os.Remove(p); err != nil { | ||
debug.Printf("failed to remove shard file %s: %v", p, err) | ||
|
@@ -515,3 +565,26 @@ func removeTombstones(fn string) ([]*zoekt.Repository, error) { | |
} | ||
return tombstones, nil | ||
} | ||
|
||
// deleteOrTombstone deletes the provided shards in indexDir that are associated with | ||
// the given repoID. | ||
// | ||
// If one of the provided shards is a compound shard and the repository is contained within it, | ||
// the repository is tombstoned instead. | ||
func deleteOrTombstone(indexDir string, repoID uint32, shardMerging bool, shards ...shard) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see you mentioned you didn't like that we have to pass in indexDir and shardMerging to this func. Maybe instead it should be part of some helper struct that state. minor nit: I'd make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it...I'll play around with passing around some sort of helper to see if that's less awkward. Something like:
|
||
var simple []shard | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see you got rid of the filtering hack, I guess just because now that it is a func it is safer that way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By filtering hack I assume you mean the |
||
for _, s := range shards { | ||
if shardMerging && maybeSetTombstone([]shard{s}, repoID) { | ||
shardsLog(indexDir, "tombname", []shard{s}) | ||
} else { | ||
simple = append(simple, s) | ||
} | ||
} | ||
|
||
if len(simple) == 0 { | ||
return | ||
} | ||
|
||
removeAll(simple...) | ||
shardsLog(indexDir, "removename", simple) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this unrelated cleanup? IE maybe should be another PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I added this functionality since we switched the implementation. Before, the implementation made sure to delete the metadata file before its associated shard file. Since we switched to using zoekt.IndexFilePaths that order isn't guaranteed anymore (leaving open the possibility of a metadata file not having an associated shard if we delete the shard and then crash). I added this additional logic to cleanup so that we would have a background process that would reap those "stranded" files.
I am happy to pull bit into a separate PR though.