diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index 0eaf96248..5bb1fe7a1 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -6,10 +6,12 @@ import ( "bytes" "context" "encoding/json" + "errors" "flag" "fmt" "html/template" "io" + "io/fs" "log" "math" "math/rand" @@ -814,10 +816,10 @@ func deleteShards(options *build.Options) error { // is present (repoA_v16.00000.zoekt). If it's present, then it gathers all rest of the shards names in ascending order // (...00001.zoekt, ...00002.zoekt). If it's missing, then zoekt assumes that it never indexed "repoA". // - // If this function were to crash while deleting repoA and we only deleted the 0th shard, then shard's 1 & 2 would never + // If this function were to crash while deleting repoA, and we only deleted the 0th shard, then shard's 1 & 2 would never // be cleaned up by Zoekt indexserver (since the 0th shard is the only shard that's tested). // - // Ensuring that we delete shards in reverse sorted order (2 -> 1 -> 0) always ensures that we don't leave an inconsistent + // Deleting shards in reverse sorted order (2 -> 1 -> 0) always ensures that we don't leave an inconsistent // state behind even if we crash. sort.Slice(shardPaths, func(i, j int) bool { @@ -840,17 +842,24 @@ func deleteShards(options *build.Options) error { continue } + err := os.Remove(shard) + if err != nil { + return fmt.Errorf("deleting shard %q: %w", shard, err) + } + + // remove the metadata file associated with the shard (if any) metaFile := shard + ".meta" - if _, err := os.Stat(metaFile); err == nil { - err := os.Remove(metaFile) - if err != nil { - return fmt.Errorf("deleting metadata file %q: %w", metaFile, err) + if _, err := os.Stat(metaFile); err != nil { + if errors.Is(err, fs.ErrNotExist) { + continue } + + return fmt.Errorf("'stat'ing metadata file %q: %w", metaFile, err) } - err := os.Remove(shard) + err = os.Remove(metaFile) if err != nil { - return fmt.Errorf("deleting shard %q: %w", shard, err) + return fmt.Errorf("deleting metadata file %q: %w", metaFile, err) } } diff --git a/cmd/zoekt-sourcegraph-indexserver/main_test.go b/cmd/zoekt-sourcegraph-indexserver/main_test.go index 5ec4f2481..e52d962e9 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/main_test.go @@ -4,18 +4,23 @@ import ( "context" "flag" "fmt" - sglog "github.com/sourcegraph/log" - "github.com/sourcegraph/log/logtest" "io" "log" "net/http" "net/http/httptest" "net/url" "os" + "path/filepath" + "sort" + "strconv" "strings" "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + sglog "github.com/sourcegraph/log" + "github.com/sourcegraph/log/logtest" + "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt" ) @@ -88,6 +93,127 @@ func TestListRepoIDs(t *testing.T) { } } +func TestDeleteShards(t *testing.T) { + remainingRepoA := zoekt.Repository{ID: 1, Name: "A"} + remainingRepoB := zoekt.Repository{ID: 2, Name: "B"} + repositoryToDelete := zoekt.Repository{ID: 99, Name: "DELETE_ME"} + + t.Run("delete repository from set of normal shards", func(t *testing.T) { + indexDir := t.TempDir() + + // map of repoID -> list of associated shard paths + metadata paths + shardMap := make(map[uint32][]string) + + // setup: create shards for each repository, and populate the shard map + for _, r := range []zoekt.Repository{ + remainingRepoA, + remainingRepoB, + repositoryToDelete, + } { + shards := createTestNormalShard(t, indexDir, r, 3) + + for _, shard := range shards { + // create stub meta file + metaFile := shard + ".meta" + f, err := os.Create(metaFile) + if err != nil { + t.Fatalf("creating metadata file %q: %s", metaFile, err) + } + + f.Close() + + shardMap[r.ID] = append(shardMap[r.ID], shard, metaFile) + } + } + + // run test: delete repository + options := &build.Options{ + IndexDir: indexDir, + RepositoryDescription: repositoryToDelete, + } + options.SetDefaults() + + err := deleteShards(options) + if err != nil { + t.Errorf("unexpected error when deleting shards: %s", err) + } + + // run assertions: gather all the shards + meta files that remain and + // check to see that only the files associated with the "remaining" repositories + // are present + var actualShardFiles []string + + for _, pattern := range []string{"*.zoekt", "*.meta"} { + files, err := filepath.Glob(filepath.Join(indexDir, pattern)) + if err != nil { + t.Fatalf("globbing indexDir: %s", err) + } + + actualShardFiles = append(actualShardFiles, files...) + } + + var expectedShardFiles []string + expectedShardFiles = append(expectedShardFiles, shardMap[remainingRepoA.ID]...) + expectedShardFiles = append(expectedShardFiles, shardMap[remainingRepoB.ID]...) + + sort.Strings(actualShardFiles) + sort.Strings(expectedShardFiles) + + if diff := cmp.Diff(expectedShardFiles, actualShardFiles); diff != "" { + t.Errorf("unexpected diff in list of shard files (-want +got):\n%s", diff) + } + }) + + t.Run("delete repository from compound shard", func(t *testing.T) { + indexDir := t.TempDir() + + // setup: enable shard merging for compound shards + t.Setenv("SRC_ENABLE_SHARD_MERGING", "1") + + // setup: create compound shard with all repositories + repositories := []zoekt.Repository{remainingRepoA, remainingRepoB, repositoryToDelete} + shard := createTestCompoundShard(t, indexDir, repositories) + + // run test: delete repository + options := &build.Options{ + IndexDir: indexDir, + RepositoryDescription: repositoryToDelete, + } + options.SetDefaults() + + err := deleteShards(options) + if err != nil { + t.Errorf("unexpected error when deleting shards: %s", err) + } + + // verify: read the compound shard, and ensure that only + // the repositories that we expect are in the shard (and the deleted one has been tombstoned) + actualRepositories, _, err := zoekt.ReadMetadataPathAlive(shard) + if err != nil { + t.Fatalf("reading repository metadata from shard: %s", err) + } + + expectedRepositories := []*zoekt.Repository{&remainingRepoA, &remainingRepoB} + + sort.Slice(actualRepositories, func(i, j int) bool { + return actualRepositories[i].ID < actualRepositories[j].ID + }) + + sort.Slice(expectedRepositories, func(i, j int) bool { + return expectedRepositories[i].ID < expectedRepositories[j].ID + }) + + opts := []cmp.Option{ + cmpopts.IgnoreUnexported(zoekt.Repository{}), + cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions", "HasSymbols"), + cmpopts.EquateEmpty(), + } + if diff := cmp.Diff(expectedRepositories, actualRepositories, opts...); diff != "" { + t.Errorf("unexpected diff in list of repositories (-want +got):\n%s", diff) + } + }) +} + func TestListRepoIDs_Error(t *testing.T) { msg := "deadbeaf deadbeaf" ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -124,6 +250,109 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +func createTestNormalShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *build.Options)) []string { + t.Helper() + + if err := os.MkdirAll(filepath.Dir(indexDir), 0700); err != nil { + t.Fatal(err) + } + + o := build.Options{ + IndexDir: indexDir, + RepositoryDescription: r, + ShardMax: 75, // create a new shard every 75 bytes + } + o.SetDefaults() + + for _, fn := range optFns { + fn(&o) + } + + b, err := build.NewBuilder(o) + if err != nil { + t.Fatalf("NewBuilder: %v", err) + } + + if numShards == 0 { + // We have to make at least 1 shard. + numShards = 1 + } + + for i := 0; i < numShards; i++ { + // Create entries (file + contents) that are ~100 bytes each. + // This (along with our shardMax setting of 75 bytes) means that each shard + // will contain at most one of these. + fileName := strconv.Itoa(i) + document := zoekt.Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} + for _, branch := range o.RepositoryDescription.Branches { + document.Branches = append(document.Branches, branch.Name) + } + + err := b.Add(document) + if err != nil { + t.Fatalf("failed to add file %q to builder: %s", fileName, err) + } + } + + if err := b.Finish(); err != nil { + t.Fatalf("Finish: %v", err) + } + + return o.FindAllShards() +} + +func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt.Repository, optFns ...func(options *build.Options)) string { + t.Helper() + + var shardNames []string + + for _, r := range repositories { + // create an isolated scratch space to store normal shards for this repository + scratchDir := t.TempDir() + + // create shards that'll be merged later + createTestNormalShard(t, scratchDir, r, 1, optFns...) + + // discover file names for all the normal shards we created + // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse + shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt")) + if err != nil { + t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err) + } + + shardNames = append(shardNames, shards...) + } + + // load the normal shards that we created + var files []zoekt.IndexFile + for _, shard := range shardNames { + f, err := os.Open(shard) + if err != nil { + t.Fatalf("opening shard file: %s", err) + } + defer f.Close() + + indexFile, err := zoekt.NewIndexFile(f) + if err != nil { + t.Fatalf("creating index file: %s", err) + } + defer indexFile.Close() + + files = append(files, indexFile) + } + + // merge all the simple shards into a compound shard + tmpName, dstName, err := zoekt.Merge(indexDir, files...) + if err != nil { + t.Fatalf("merging index files into compound shard: %s", err) + } + if err := os.Rename(tmpName, dstName); err != nil { + t.Fatal(err) + } + + return dstName +} + func TestCreateEmptyShard(t *testing.T) { dir := t.TempDir()