-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Avoid reopening packfile on every object access (#852)
By default, the `go-git` library will open the packfile on every call to `Repository.BlobObject`, then close it. During indexing, we collect the list of files to index, then iterate through each one calling `Repository.BlobObject`. So on every object access the packfile reopened, and `go-git` reallocates some in-memory buffers. This PR bypasses `git.PlainOpen` to allow us to enable the `KeepDescriptors` option. This option keeps packfile files open, and caches wrappers for them. The files then need to be explicitly closed when done with the repo. Benefits: * Avoid reallocating the memory buffers on every object access (see benchmark results below) * (Highly speculative) I suspect this could improve OS decisions around when to cache portions of the packfile. Maybe constantly reopening and seeking within the file makes it harder for the OS to determine the true access pattern, which is roughly random access. This can affect decisions like readahead and whether to consider pages 'active'.
- Loading branch information
1 parent
7caa174
commit 6a4b615
Showing
2 changed files
with
129 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ package gitindex | |
import ( | ||
"bytes" | ||
"context" | ||
"errors" | ||
"fmt" | ||
"net/url" | ||
"os" | ||
|
@@ -64,6 +65,83 @@ func TestIndexEmptyRepo(t *testing.T) { | |
} | ||
} | ||
|
||
func TestIndexNonexistentRepo(t *testing.T) { | ||
dir := t.TempDir() | ||
desc := zoekt.Repository{ | ||
Name: "nonexistent", | ||
} | ||
opts := Options{ | ||
RepoDir: "does/not/exist", | ||
Branches: []string{"main"}, | ||
BuildOptions: build.Options{ | ||
RepositoryDescription: desc, | ||
IndexDir: dir, | ||
}, | ||
} | ||
|
||
if _, err := IndexGitRepo(opts); err == nil { | ||
t.Fatal("expected error, got none") | ||
} else if !errors.Is(err, git.ErrRepositoryNotExists) { | ||
t.Fatalf("expected git.ErrRepositoryNotExists, got %v", err) | ||
} | ||
} | ||
|
||
func TestIndexTinyRepo(t *testing.T) { | ||
// Create a repo with one file in it. | ||
dir := t.TempDir() | ||
executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo")) | ||
|
||
repoDir := filepath.Join(dir, "repo") | ||
executeCommand(t, repoDir, exec.Command("git", "config", "user.name", "Thomas")) | ||
executeCommand(t, repoDir, exec.Command("git", "config", "user.email", "[email protected]")) | ||
|
||
if err := os.WriteFile(filepath.Join(repoDir, "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil { | ||
t.Fatalf("WriteFile: %v", err) | ||
} | ||
executeCommand(t, repoDir, exec.Command("git", "add", ".")) | ||
executeCommand(t, repoDir, exec.Command("git", "commit", "-m", "initial commit")) | ||
|
||
// Test that indexing accepts both the repo directory, and the .git subdirectory. | ||
for _, testDir := range []string{"repo", "repo/.git"} { | ||
opts := Options{ | ||
RepoDir: filepath.Join(dir, testDir), | ||
Branches: []string{"main"}, | ||
BuildOptions: build.Options{ | ||
RepositoryDescription: zoekt.Repository{Name: "repo"}, | ||
IndexDir: dir, | ||
}, | ||
} | ||
|
||
if _, err := IndexGitRepo(opts); err != nil { | ||
t.Fatalf("unexpected error %v", err) | ||
} | ||
|
||
searcher, err := shards.NewDirectorySearcher(dir) | ||
if err != nil { | ||
t.Fatal("NewDirectorySearcher", err) | ||
} | ||
|
||
results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{}) | ||
searcher.Close() | ||
|
||
if err != nil { | ||
t.Fatal("search failed", err) | ||
} | ||
|
||
if len(results.Files) != 1 { | ||
t.Fatalf("got search result %v, want 1 file", results.Files) | ||
} | ||
} | ||
} | ||
|
||
func executeCommand(t *testing.T, dir string, cmd *exec.Cmd) *exec.Cmd { | ||
cmd.Dir = dir | ||
if err := cmd.Run(); err != nil { | ||
t.Fatalf("cmd.Run: %v", err) | ||
} | ||
return cmd | ||
} | ||
|
||
func TestIndexDeltaBasic(t *testing.T) { | ||
type branchToDocumentMap map[string][]zoekt.Document | ||
|
||
|