From 282a251a1d2d8d2293e8b0cce12b5b3f208ab12b Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 30 Sep 2024 08:44:10 -0700 Subject: [PATCH] Remove branchMap in prepareNormalBuild (#838) This PR refactors the "build preparation" code to remove `branchMap`, in favor of storing everything in a single map. The main motivation is just to make the code clearer. Specific changes: * Store branches in the same map as blob locations, remove `branchMap` * Introduce new struct `BlobIndexInfo` to hold the old location information, plus branches * Rename `BlobLocation` -> `BlobRepo` --- cmd/zoekt-repo-index/main.go | 12 +-- gitindex/index.go | 154 ++++++++++++++++++----------------- gitindex/index_test.go | 45 +++++++++- gitindex/tree.go | 36 ++++---- 4 files changed, 149 insertions(+), 98 deletions(-) diff --git a/cmd/zoekt-repo-index/main.go b/cmd/zoekt-repo-index/main.go index d5b04d5e4..1ca2d7fa7 100644 --- a/cmd/zoekt-repo-index/main.go +++ b/cmd/zoekt-repo-index/main.go @@ -180,7 +180,7 @@ func main() { } } - perBranch := map[string]map[fileKey]gitindex.BlobLocation{} + perBranch := map[string]map[fileKey]gitindex.BlobRepo{} opts.SubRepositories = map[string]*zoekt.Repository{} // branch => repo => version @@ -193,7 +193,7 @@ func main() { } perBranch[br.branch] = files - for key, loc := range files { + for key, repo := range files { _, ok := opts.SubRepositories[key.SubRepoPath] if ok { // This can be incorrect: if the layout of manifests @@ -204,8 +204,8 @@ func main() { } desc := &zoekt.Repository{} - if err := gitindex.SetTemplatesFromOrigin(desc, loc.URL); err != nil { - log.Fatalf("SetTemplatesFromOrigin(%s): %v", loc.URL, err) + if err := gitindex.SetTemplatesFromOrigin(desc, repo.URL); err != nil { + log.Fatalf("SetTemplatesFromOrigin(%s): %v", repo.URL, err) } opts.SubRepositories[key.SubRepoPath] = desc @@ -325,8 +325,8 @@ func getManifest(repo *git.Repository, branch, path string) (*manifest.Manifest, func iterateManifest(mf *manifest.Manifest, baseURL url.URL, revPrefix string, cache *gitindex.RepoCache, -) (map[fileKey]gitindex.BlobLocation, map[string]plumbing.Hash, error) { - allFiles := map[fileKey]gitindex.BlobLocation{} +) (map[fileKey]gitindex.BlobRepo, map[string]plumbing.Hash, error) { + allFiles := map[fileKey]gitindex.BlobRepo{} allVersions := map[string]plumbing.Hash{} for _, p := range mf.Project { rev := mf.ProjectRevision(&p) diff --git a/gitindex/index.go b/gitindex/index.go index a7ecd0072..3eae74d7a 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -426,10 +426,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { } // branch => (path, sha1) => repo. - var repos map[fileKey]BlobLocation - - // fileKey => branches - var branchMap map[fileKey][]string + var repos map[fileKey]BlobIndexInfo // Branch => Repo => SHA1 var branchVersions map[string]map[string]plumbing.Hash @@ -441,7 +438,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { var changedOrRemovedFiles []string if opts.BuildOptions.IsDelta { - repos, branchMap, branchVersions, changedOrRemovedFiles, err = prepareDeltaBuild(opts, repo) + repos, branchVersions, changedOrRemovedFiles, err = prepareDeltaBuild(opts, repo) if err != nil { log.Printf("delta build: falling back to normal build since delta build failed, repository=%q, err=%s", opts.BuildOptions.RepositoryDescription.Name, err) opts.BuildOptions.IsDelta = false @@ -449,24 +446,24 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { } if !opts.BuildOptions.IsDelta { - repos, branchMap, branchVersions, err = prepareNormalBuild(opts, repo) + repos, branchVersions, err = prepareNormalBuild(opts, repo) if err != nil { return false, fmt.Errorf("preparing normal build: %w", err) } } - reposByPath := map[string]BlobLocation{} - for key, location := range repos { - reposByPath[key.SubRepoPath] = location + reposByPath := map[string]BlobIndexInfo{} + for key, info := range repos { + reposByPath[key.SubRepoPath] = info } opts.BuildOptions.SubRepositories = map[string]*zoekt.Repository{} - for path, location := range reposByPath { + for path, info := range reposByPath { tpl := opts.BuildOptions.RepositoryDescription if path != "" { - tpl = zoekt.Repository{URL: location.URL.String()} - if err := SetTemplatesFromOrigin(&tpl, location.URL); err != nil { - log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, location.URL, err) + tpl = zoekt.Repository{URL: info.Repo.URL.String()} + if err := SetTemplatesFromOrigin(&tpl, info.Repo.URL); err != nil { + log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.Repo.URL, err) } } opts.BuildOptions.SubRepositories[path] = &tpl @@ -542,7 +539,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { keys := fileKeys[name] for _, key := range keys { - doc, err := createDocument(key, repos, branchMap, ranks, opts.BuildOptions) + doc, err := createDocument(key, repos, ranks, opts.BuildOptions) if err != nil { return false, err } @@ -595,11 +592,11 @@ func newIgnoreMatcher(tree *object.Tree) (*ignore.Matcher, error) { // prepareDeltaBuildFunc is a function that calculates the necessary metadata for preparing // a build.Builder instance for generating a delta build. -type prepareDeltaBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) +type prepareDeltaBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) // prepareNormalBuildFunc is a function that calculates the necessary metadata for preparing // a build.Builder instance for generating a normal build. -type prepareNormalBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) +type prepareNormalBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, err error) type gitIndexConfig struct { // prepareDeltaBuild, if not nil, is the function that is used to calculate the metadata that will be used to @@ -615,19 +612,19 @@ type gitIndexConfig struct { prepareNormalBuild prepareNormalBuildFunc } -func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) { +func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) { if options.Submodules { - return nil, nil, nil, nil, fmt.Errorf("delta builds currently don't support submodule indexing") + return nil, nil, nil, fmt.Errorf("delta builds currently don't support submodule indexing") } // discover what commits we indexed during our last build existingRepository, _, ok, err := options.BuildOptions.FindRepositoryMetadata() if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to get repository metadata: %w", err) + return nil, nil, nil, fmt.Errorf("failed to get repository metadata: %w", err) } if !ok { - return nil, nil, nil, nil, fmt.Errorf("no existing shards found for repository") + return nil, nil, nil, fmt.Errorf("no existing shards found for repository") } if options.DeltaShardNumberFallbackThreshold > 0 { @@ -641,7 +638,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f oldShards := options.BuildOptions.FindAllShards() if uint64(len(oldShards)) > options.DeltaShardNumberFallbackThreshold { - return nil, nil, nil, nil, fmt.Errorf("number of existing shards (%d) > requested shard threshold (%d)", len(oldShards), options.DeltaShardNumberFallbackThreshold) + return nil, nil, nil, fmt.Errorf("number of existing shards (%d) > requested shard threshold (%d)", len(oldShards), options.DeltaShardNumberFallbackThreshold) } } @@ -663,20 +660,17 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f existingBranchList := strings.Join(existingBranchNames, ", ") optionsBranchList := strings.Join(optionsBranchNames, ", ") - return nil, nil, nil, nil, fmt.Errorf("requested branch set in build options (%q) != branch set found on disk (%q) - branch set must be the same for delta shards", optionsBranchList, existingBranchList) + return nil, nil, nil, fmt.Errorf("requested branch set in build options (%q) != branch set found on disk (%q) - branch set must be the same for delta shards", optionsBranchList, existingBranchList) } // Check if the build options hash does not match the repository metadata's hash // If it does not match then one or more index options has changed and will require a normal build instead of a delta build if options.BuildOptions.GetHash() != existingRepository.IndexOptions { - return nil, nil, nil, nil, fmt.Errorf("one or more index options previously stored for repository %s (ID: %d) does not match the index options for this requested build; These index option updates are incompatible with delta build. new index options: %+v", existingRepository.Name, existingRepository.ID, options.BuildOptions.HashOptions()) + return nil, nil, nil, fmt.Errorf("one or more index options previously stored for repository %s (ID: %d) does not match the index options for this requested build; These index option updates are incompatible with delta build. new index options: %+v", existingRepository.Name, existingRepository.ID, options.BuildOptions.HashOptions()) } // branch => (path, sha1) => repo. - repos = map[fileKey]BlobLocation{} - - // fileKey => branches - branchMap = map[fileKey][]string{} + repos = map[fileKey]BlobIndexInfo{} // branch name -> git worktree at most current commit branchToCurrentTree := make(map[string]*object.Tree, len(options.Branches)) @@ -684,12 +678,12 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f for _, b := range options.Branches { commit, err := getCommit(repository, options.BranchPrefix, b) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("getting last current commit for branch %q: %w", b, err) + return nil, nil, nil, fmt.Errorf("getting last current commit for branch %q: %w", b, err) } tree, err := commit.Tree() if err != nil { - return nil, nil, nil, nil, fmt.Errorf("getting current git tree for branch %q: %w", b, err) + return nil, nil, nil, fmt.Errorf("getting current git tree for branch %q: %w", b, err) } branchToCurrentTree[b] = tree @@ -698,39 +692,38 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f rawURL := options.BuildOptions.RepositoryDescription.URL u, err := url.Parse(rawURL) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("parsing repository URL %q: %w", rawURL, err) + return nil, nil, nil, fmt.Errorf("parsing repository URL %q: %w", rawURL, err) } // TODO: Support repository submodules for delta builds // For this prototype, we are ignoring repository submodules, which means that we can use the same // blob location for all files - hackSharedBlobLocation := BlobLocation{ - Repo: repository, - URL: u, + hackSharedBlobLocation := BlobRepo{ + GitRepo: repository, + URL: u, } - // loop over all branches, calculate the diff between our // last indexed commit and the current commit, and add files mentioned in the diff for _, branch := range existingRepository.Branches { lastIndexedCommit, err := getCommit(repository, "", branch.Version) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("getting last indexed commit for branch %q: %w", branch.Name, err) + return nil, nil, nil, fmt.Errorf("getting last indexed commit for branch %q: %w", branch.Name, err) } lastIndexedTree, err := lastIndexedCommit.Tree() if err != nil { - return nil, nil, nil, nil, fmt.Errorf("getting lasted indexed git tree for branch %q: %w", branch.Name, err) + return nil, nil, nil, fmt.Errorf("getting lasted indexed git tree for branch %q: %w", branch.Name, err) } changes, err := object.DiffTreeWithOptions(context.Background(), lastIndexedTree, branchToCurrentTree[branch.Name], &object.DiffTreeOptions{DetectRenames: false}) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("generating changeset for branch %q: %w", branch.Name, err) + return nil, nil, nil, fmt.Errorf("generating changeset for branch %q: %w", branch.Name, err) } for i, c := range changes { oldFile, newFile, err := c.Files() if err != nil { - return nil, nil, nil, nil, fmt.Errorf("change #%d: getting files before and after change: %w", i, err) + return nil, nil, nil, fmt.Errorf("change #%d: getting files before and after change: %w", i, err) } if newFile != nil { @@ -740,13 +733,20 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f // TODO@ggilmore: HACK - remove once ignore files are supported in delta builds if newFileRelativeRootPath == ignore.IgnoreFile { - return nil, nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile) + return nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile) } // either file is added or renamed, so we need to add the new version to the build file := fileKey{Path: newFileRelativeRootPath, ID: newFile.Hash} - repos[file] = hackSharedBlobLocation - branchMap[file] = append(branchMap[file], branch.Name) + if existing, ok := repos[file]; ok { + existing.Branches = append(existing.Branches, branch.Name) + repos[file] = existing + } else { + repos[file] = BlobIndexInfo{ + Repo: hackSharedBlobLocation, + Branches: []string{branch.Name}, + } + } } if oldFile == nil { @@ -759,7 +759,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f oldFileRelativeRootPath := c.From.Name if oldFileRelativeRootPath == ignore.IgnoreFile { - return nil, nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile) + return nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile) } // The file is either modified or deleted. So, we need to add ALL versions @@ -772,12 +772,19 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f continue } - return nil, nil, nil, nil, fmt.Errorf("getting hash for file %q in branch %q: %w", oldFile.Name, b, err) + return nil, nil, nil, fmt.Errorf("getting hash for file %q in branch %q: %w", oldFile.Name, b, err) } file := fileKey{Path: oldFileRelativeRootPath, ID: f.ID()} - repos[file] = hackSharedBlobLocation - branchMap[file] = append(branchMap[file], b) + if existing, ok := repos[file]; ok { + existing.Branches = append(existing.Branches, b) + repos[file] = existing + } else { + repos[file] = BlobIndexInfo{ + Repo: hackSharedBlobLocation, + Branches: []string{b}, + } + } } changedOrDeletedPaths = append(changedOrDeletedPaths, oldFileRelativeRootPath) @@ -786,39 +793,34 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f // we need to de-duplicate the branch map before returning it - it's possible for the same // branch to have been added multiple times if a file has been modified across multiple commits - - for file, branches := range branchMap { - sort.Strings(branches) - branchMap[file] = uniq(branches) + for _, info := range repos { + sort.Strings(info.Branches) + info.Branches = uniq(info.Branches) } // we also need to de-duplicate the list of changed or deleted file paths, it's also possible to have duplicates // for the same reasoning as above - sort.Strings(changedOrDeletedPaths) changedOrDeletedPaths = uniq(changedOrDeletedPaths) - return repos, branchMap, nil, changedOrDeletedPaths, nil + return repos, nil, changedOrDeletedPaths, nil } -func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) { +func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, err error) { var repoCache *RepoCache if options.Submodules { repoCache = NewRepoCache(options.RepoCacheDir) } - // branch => (path, sha1) => repo. - repos = map[fileKey]BlobLocation{} - - // fileKey => branches - branchMap = map[fileKey][]string{} + // branch => (path, sha1) => metadata. + repos = map[fileKey]BlobIndexInfo{} // Branch => Repo => SHA1 branchVersions = map[string]map[string]plumbing.Hash{} branches, err := expandBranches(repository, options.Branches, options.BranchPrefix) if err != nil { - return nil, nil, nil, fmt.Errorf("expandBranches: %w", err) + return nil, nil, fmt.Errorf("expandBranches: %w", err) } for _, b := range branches { @@ -828,48 +830,54 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ continue } - return nil, nil, nil, fmt.Errorf("getCommit: %w", err) + return nil, nil, fmt.Errorf("getCommit: %w", err) } tree, err := commit.Tree() if err != nil { - return nil, nil, nil, fmt.Errorf("commit.Tree: %w", err) + return nil, nil, fmt.Errorf("commit.Tree: %w", err) } ig, err := newIgnoreMatcher(tree) if err != nil { - return nil, nil, nil, fmt.Errorf("newIgnoreMatcher: %w", err) + return nil, nil, fmt.Errorf("newIgnoreMatcher: %w", err) } files, subVersions, err := TreeToFiles(repository, tree, options.BuildOptions.RepositoryDescription.URL, repoCache) if err != nil { - return nil, nil, nil, fmt.Errorf("TreeToFiles: %w", err) + return nil, nil, fmt.Errorf("TreeToFiles: %w", err) } for k, v := range files { if ig.Match(k.Path) { continue } - repos[k] = v - branchMap[k] = append(branchMap[k], b) + + if existing, ok := repos[k]; ok { + existing.Branches = append(existing.Branches, b) + repos[k] = existing + } else { + repos[k] = BlobIndexInfo{Repo: v, Branches: []string{b}} + } } branchVersions[b] = subVersions } - return repos, branchMap, branchVersions, nil + return repos, branchVersions, nil } func createDocument(key fileKey, - repos map[fileKey]BlobLocation, - branchMap map[fileKey][]string, + repos map[fileKey]BlobIndexInfo, ranks repoPathRanks, opts build.Options, ) (zoekt.Document, error) { - blob, err := repos[key].Repo.BlobObject(key.ID) + repo := repos[key].Repo + blob, err := repo.GitRepo.BlobObject(key.ID) + branches := repos[key].Branches // We filter out large documents when fetching the repo. So if an object is too large, it will not be found. if errors.Is(err, plumbing.ErrObjectNotFound) { - return skippedLargeDoc(key, branchMap, opts), nil + return skippedLargeDoc(key, branches, opts), nil } if err != nil { @@ -878,7 +886,7 @@ func createDocument(key fileKey, keyFullPath := key.FullPath() if blob.Size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(keyFullPath) { - return skippedLargeDoc(key, branchMap, opts), nil + return skippedLargeDoc(key, branches, opts), nil } contents, err := blobContents(blob) @@ -897,16 +905,16 @@ func createDocument(key fileKey, SubRepositoryPath: key.SubRepoPath, Name: keyFullPath, Content: contents, - Branches: branchMap[key], + Branches: branches, Ranks: pathRanks, }, nil } -func skippedLargeDoc(key fileKey, branchMap map[fileKey][]string, opts build.Options) zoekt.Document { +func skippedLargeDoc(key fileKey, branches []string, opts build.Options) zoekt.Document { return zoekt.Document{ SkipReason: fmt.Sprintf("file size exceeds maximum size %d", opts.SizeMax), Name: key.FullPath(), - Branches: branchMap[key], + Branches: branches, SubRepositoryPath: key.SubRepoPath, } } diff --git a/gitindex/index_test.go b/gitindex/index_test.go index 0aca04145..fff8d8ab9 100644 --- a/gitindex/index_test.go +++ b/gitindex/index_test.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "sort" "testing" @@ -607,13 +608,13 @@ func TestIndexDeltaBasic(t *testing.T) { // setup: prepare spy versions of prepare delta / normal build so that we can observe // whether they were called appropriately deltaBuildCalled := false - prepareDeltaSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) { + prepareDeltaSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) { deltaBuildCalled = true return prepareDeltaBuild(options, repository) } normalBuildCalled := false - prepareNormalSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) { + prepareNormalSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, err error) { normalBuildCalled = true return prepareNormalBuild(options, repository) } @@ -784,3 +785,43 @@ func TestSetTemplate(t *testing.T) { t.Errorf("got %q, want %q", got, want) } } + +func BenchmarkPrepareNormalBuild(b *testing.B) { + // NOTE: To run the benchmark, download a large repo (like github.com/chromium/chromium/) and change this to its path. + repoDir := "/path/to/your/repo" + repo, err := git.PlainOpen(repoDir) + if err != nil { + b.Fatalf("Failed to open test repository: %v", err) + } + + opts := Options{ + RepoDir: repoDir, + Submodules: false, + BranchPrefix: "refs/heads/", + Branches: []string{"main"}, + BuildOptions: build.Options{ + RepositoryDescription: zoekt.Repository{ + Name: "test-repo", + URL: "https://github.com/example/test-repo", + }, + }, + } + + b.ReportAllocs() + + repos, branchVersions, err := prepareNormalBuild(opts, repo) + if err != nil { + b.Fatalf("prepareNormalBuild failed: %v", err) + } + + runtime.GC() + + var m runtime.MemStats + runtime.ReadMemStats(&m) + b.ReportMetric(float64(m.HeapInuse), "heap-used-bytes") + b.ReportMetric(float64(m.HeapInuse), "heap-allocated-bytes") + + if len(repos) == 0 || len(branchVersions) == 0 { + b.Fatalf("Unexpected empty results") + } +} diff --git a/gitindex/tree.go b/gitindex/tree.go index 98f9d2092..31b480cb2 100644 --- a/gitindex/tree.go +++ b/gitindex/tree.go @@ -35,7 +35,7 @@ type repoWalker struct { repo *git.Repository repoURL *url.URL - tree map[fileKey]BlobLocation + tree map[fileKey]BlobRepo // Path => SubmoduleEntry submodules map[string]*SubmoduleEntry @@ -65,7 +65,7 @@ func newRepoWalker(r *git.Repository, repoURL string, repoCache *RepoCache) *rep return &repoWalker{ repo: r, repoURL: u, - tree: map[fileKey]BlobLocation{}, + tree: map[fileKey]BlobRepo{}, repoCache: repoCache, subRepoVersions: map[string]plumbing.Hash{}, } @@ -97,9 +97,7 @@ func (rw *repoWalker) parseModuleMap(t *object.Tree) error { // TreeToFiles fetches the blob SHA1s for a tree. If repoCache is // non-nil, recurse into submodules. In addition, it returns a mapping // that indicates in which repo each SHA1 can be found. -func TreeToFiles(r *git.Repository, t *object.Tree, - repoURL string, repoCache *RepoCache, -) (map[fileKey]BlobLocation, map[string]plumbing.Hash, error) { +func TreeToFiles(r *git.Repository, t *object.Tree, repoURL string, repoCache *RepoCache) (map[fileKey]BlobRepo, map[string]plumbing.Hash, error) { rw := newRepoWalker(r, repoURL, repoCache) if err := rw.parseModuleMap(t); err != nil { @@ -184,12 +182,9 @@ func (r *repoWalker) handleEntry(p string, e *object.TreeEntry) error { return nil } - r.tree[fileKey{ - Path: p, - ID: e.Hash, - }] = BlobLocation{ - Repo: r.repo, - URL: r.repoURL, + r.tree[fileKey{Path: p, ID: e.Hash}] = BlobRepo{ + GitRepo: r.repo, + URL: r.repoURL, } return nil } @@ -206,14 +201,21 @@ func (k *fileKey) FullPath() string { return filepath.Join(k.SubRepoPath, k.Path) } -// BlobLocation holds data where a blob can be found. -type BlobLocation struct { - Repo *git.Repository - URL *url.URL +// BlobIndexInfo contains information about the blob that's needed for indexing. +type BlobIndexInfo struct { + Repo BlobRepo + // Branches is the list of branches that contain the blob. + Branches []string } -func (l *BlobLocation) Blob(id *plumbing.Hash) ([]byte, error) { - blob, err := l.Repo.BlobObject(*id) +// BlobRepo holds the repo where the blob can be found. +type BlobRepo struct { + GitRepo *git.Repository + URL *url.URL +} + +func (l *BlobRepo) Blob(id *plumbing.Hash) ([]byte, error) { + blob, err := l.GitRepo.BlobObject(*id) if err != nil { return nil, err }