From 3109882bd0115b74ace1cfe7db1d33f6abc7fa0d Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 15 Nov 2023 08:00:37 -0800 Subject: [PATCH] Pull out document creation into its own method (#693) I experimented with some changes to encourage `go-git` to use less memory. They didn't pan out, but this intermediate refactor felt useful on its own. It helps break up the super long `indexGitRepo` method. --- gitindex/index.go | 81 ++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/gitindex/index.go b/gitindex/index.go index e98cb012..d3a5e3c6 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -546,46 +546,13 @@ func indexGitRepo(opts Options, config gitIndexConfig) error { keys := fileKeys[name] for _, key := range keys { - brs := branchMap[key] - blob, err := repos[key].Repo.BlobObject(key.ID) + doc, err := createDocument(key, repos, branchMap, ranks, opts.BuildOptions) if err != nil { return err } - keyFullPath := key.FullPath() - - if blob.Size > int64(opts.BuildOptions.SizeMax) && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) { - if err := builder.Add(zoekt.Document{ - SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.BuildOptions.SizeMax), - Name: keyFullPath, - Branches: brs, - SubRepositoryPath: key.SubRepoPath, - }); err != nil { - return err - } - continue - } - - contents, err := blobContents(blob) - if err != nil { - return err - } - - var pathRanks []float64 - if len(ranks.Paths) > 0 { - // If the repository has ranking data, then store the file's rank. - pathRank := ranks.rank(keyFullPath) - pathRanks = []float64{pathRank} - } - - if err := builder.Add(zoekt.Document{ - SubRepositoryPath: key.SubRepoPath, - Name: keyFullPath, - Content: contents, - Branches: brs, - Ranks: pathRanks, - }); err != nil { - return fmt.Errorf("error adding document with name %s: %w", keyFullPath, err) + if err := builder.Add(doc); err != nil { + return fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err) } } } @@ -893,6 +860,48 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ return repos, branchMap, branchVersions, nil } +func createDocument(key fileKey, + repos map[fileKey]BlobLocation, + branchMap map[fileKey][]string, + ranks repoPathRanks, + opts build.Options, +) (zoekt.Document, error) { + blob, err := repos[key].Repo.BlobObject(key.ID) + if err != nil { + return zoekt.Document{}, err + } + + keyFullPath := key.FullPath() + if blob.Size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(keyFullPath) { + return zoekt.Document{ + SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.SizeMax), + Name: key.FullPath(), + Branches: branchMap[key], + SubRepositoryPath: key.SubRepoPath, + }, nil + } + + contents, err := blobContents(blob) + if err != nil { + return zoekt.Document{}, err + } + + var pathRanks []float64 + if len(ranks.Paths) > 0 { + // If the repository has ranking data, then store the file's rank. + pathRank := ranks.rank(keyFullPath) + pathRanks = []float64{pathRank} + } + + return zoekt.Document{ + SubRepositoryPath: key.SubRepoPath, + Name: keyFullPath, + Content: contents, + Branches: branchMap[key], + Ranks: pathRanks, + }, nil +} + func blobContents(blob *object.Blob) ([]byte, error) { r, err := blob.Reader() if err != nil {