Skip to content

Commit

Permalink
wip - add logic to cleanup() to handle stranded metadata files
Browse files Browse the repository at this point in the history
  • Loading branch information
ggilmore committed Nov 28, 2022
1 parent e53fada commit 1ab8eb7
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 24 deletions.
52 changes: 38 additions & 14 deletions cmd/zoekt-sourcegraph-indexserver/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,38 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool)
}
}

// remove any Zoekt metadata files in the given dir that don't have an
// associated shard file
metaFiles, err := filepath.Glob(filepath.Join(indexDir, "*.meta"))
if err != nil {
log.Printf("failed to glob %q for stranded metadata files: %s", indexDir, err)
} else {
for _, metaFile := range metaFiles {
shard := strings.TrimSuffix(metaFile, ".meta")
_, err := os.Stat(shard)
if err == nil {
// metadata file has associated shard
continue
}

if !errors.Is(err, fs.ErrNotExist) {
log.Printf("failed to stat metadata file %q: %s", metaFile, err)
continue
}

// metadata doesn't have an associated shard file, remove the metadata file

err = os.Remove(metaFile)
if err != nil {
log.Printf("failed to remove stranded metadata file %q: %s", metaFile, err)
continue
} else {
log.Printf("removed stranded metadata file: %s", metaFile)
}

}
}

metricCleanupDuration.Observe(time.Since(start).Seconds())
}

Expand Down Expand Up @@ -563,24 +595,16 @@ func deleteShards(options *build.Options) error {
continue
}

err := os.Remove(shard)
paths, err := zoekt.IndexFilePaths(shard)
if err != nil {
return fmt.Errorf("deleting shard %q: %w", shard, err)
return fmt.Errorf("listing files for shard %q: %w", shard, err)
}

// remove the metadata file associated with the shard (if any)
metaFile := shard + ".meta"
if _, err := os.Stat(metaFile); err != nil {
if errors.Is(err, fs.ErrNotExist) {
continue
for _, p := range paths {
err := os.Remove(p)
if err != nil {
return fmt.Errorf("deleting %q: %w", p, err)
}

return fmt.Errorf("'stat'ing metadata file %q: %w", metaFile, err)
}

err = os.Remove(metaFile)
if err != nil {
return fmt.Errorf("deleting metadata file %q: %w", metaFile, err)
}
}

Expand Down
70 changes: 60 additions & 10 deletions cmd/zoekt-sourcegraph-indexserver/cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
)

func TestCleanup(t *testing.T) {

mk := func(name string, n int, mtime time.Time) shard {
return shard{
RepoID: fakeID(name),
Expand All @@ -28,6 +29,11 @@ func TestCleanup(t *testing.T) {
RepoTombstone: false,
}
}

mkMeta := func(name string, n int) string {
return fmt.Sprintf("%s_v%d.%05d.zoekt.meta", url.QueryEscape(name), 15, n)
}

// We don't use getShards so that we have two implementations of the same
// thing (ie pick up bugs in one)
glob := func(pattern string) []shard {
Expand Down Expand Up @@ -56,14 +62,16 @@ func TestCleanup(t *testing.T) {
recent := now.Add(-time.Hour)
old := now.Add(-25 * time.Hour)
cases := []struct {
name string
repos []string
index []shard
trash []shard
tmps []string

wantIndex []shard
wantTrash []shard
name string
repos []string
indexMetaFiles []string
index []shard
trash []shard
tmps []string

wantIndex []shard
wantIndexMetaFiles []string
wantTrash []shard
}{{
name: "noop",
}, {
Expand Down Expand Up @@ -96,6 +104,13 @@ func TestCleanup(t *testing.T) {
index: []shard{mk("foo", 0, recent), mk("bar", 0, recent)},
wantIndex: []shard{mk("foo", 0, recent)},
wantTrash: []shard{mk("bar", 0, now)},
}, {
name: "remove metafiles with no associated shards",
repos: []string{"foo", "bar"},
index: []shard{mk("foo", 0, recent), mk("bar", 0, recent)},
indexMetaFiles: []string{mkMeta("foo", 0), mkMeta("foo", 1), mkMeta("bar", 0)},
wantIndex: []shard{mk("foo", 0, recent), mk("bar", 0, recent)},
wantIndexMetaFiles: []string{mkMeta("foo", 0), mkMeta("bar", 0)},
}, {
name: "clean old .tmp files",
tmps: []string{"recent.tmp", "old.tmp"},
Expand Down Expand Up @@ -134,19 +149,54 @@ func TestCleanup(t *testing.T) {
t.Fatal(err)
}
}
for _, f := range tt.indexMetaFiles {
path := filepath.Join(dir, f)
if _, err := os.Create(path); err != nil {
t.Fatal(err)
}
}

var repoIDs []uint32
for _, name := range tt.repos {
repoIDs = append(repoIDs, fakeID(name))
}
cleanup(dir, repoIDs, now, false)

if d := cmp.Diff(tt.wantIndex, glob(filepath.Join(dir, "*.zoekt"))); d != "" {
actualIndexShards := glob(filepath.Join(dir, "*.zoekt"))

sort.Slice(actualIndexShards, func(i, j int) bool {
return actualIndexShards[i].Path < actualIndexShards[j].Path
})
sort.Slice(tt.wantIndex, func(i, j int) bool {
return tt.wantIndex[i].Path < tt.wantIndex[j].Path
})

if d := cmp.Diff(tt.wantIndex, actualIndexShards); d != "" {
t.Errorf("unexpected index (-want, +got):\n%s", d)
}
if d := cmp.Diff(tt.wantTrash, glob(filepath.Join(dir, ".trash", "*.zoekt"))); d != "" {

actualTrashShards := glob(filepath.Join(dir, ".trash", "*.zoekt"))

sort.Slice(actualTrashShards, func(i, j int) bool {
return actualTrashShards[i].Path < actualTrashShards[j].Path
})

sort.Slice(tt.wantTrash, func(i, j int) bool {
return tt.wantTrash[i].Path < tt.wantTrash[j].Path
})
if d := cmp.Diff(tt.wantTrash, actualTrashShards); d != "" {
t.Errorf("unexpected trash (-want, +got):\n%s", d)
}

actualIndexMetaFiles := globBase(filepath.Join(dir, "*.meta"))

sort.Strings(actualIndexMetaFiles)
sort.Strings(tt.wantIndexMetaFiles)

if d := cmp.Diff(tt.wantIndexMetaFiles, actualIndexMetaFiles, cmpopts.EquateEmpty()); d != "" {
t.Errorf("unexpected metadata files (-want, +got):\n%s", d)
}

if tmps := globBase(filepath.Join(dir, "*.tmp")); len(tmps) > 0 {
t.Errorf("unexpected tmps: %v", tmps)
}
Expand Down

0 comments on commit 1ab8eb7

Please sign in to comment.