From 0ac3b041556b02724d06a4b213a0e73b355ebcda Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 14 Nov 2023 14:01:09 -0800 Subject: [PATCH 1/3] Pull out document creation into its own method --- gitindex/index.go | 107 +++++++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 48 deletions(-) diff --git a/gitindex/index.go b/gitindex/index.go index e98cb0125..b0e87a951 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -546,46 +546,13 @@ func indexGitRepo(opts Options, config gitIndexConfig) error { keys := fileKeys[name] for _, key := range keys { - brs := branchMap[key] - blob, err := repos[key].Repo.BlobObject(key.ID) + doc, err := createDocument(key, repos, branchMap, ranks, opts) if err != nil { return err } - keyFullPath := key.FullPath() - - if blob.Size > int64(opts.BuildOptions.SizeMax) && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) { - if err := builder.Add(zoekt.Document{ - SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.BuildOptions.SizeMax), - Name: keyFullPath, - Branches: brs, - SubRepositoryPath: key.SubRepoPath, - }); err != nil { - return err - } - continue - } - - contents, err := blobContents(blob) - if err != nil { - return err - } - - var pathRanks []float64 - if len(ranks.Paths) > 0 { - // If the repository has ranking data, then store the file's rank. - pathRank := ranks.rank(keyFullPath) - pathRanks = []float64{pathRank} - } - - if err := builder.Add(zoekt.Document{ - SubRepositoryPath: key.SubRepoPath, - Name: keyFullPath, - Content: contents, - Branches: brs, - Ranks: pathRanks, - }); err != nil { - return fmt.Errorf("error adding document with name %s: %w", keyFullPath, err) + if err := builder.Add(doc); err != nil { + return fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err) } } } @@ -893,6 +860,62 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ return repos, branchMap, branchVersions, nil } +func uniq(ss []string) []string { + result := ss[:0] + var last string + for i, s := range ss { + if i == 0 || s != last { + result = append(result, s) + } + last = s + } + return result +} + +func createDocument( + key fileKey, + repos map[fileKey]BlobLocation, + branchMap map[fileKey][]string, + ranks repoPathRanks, + opts Options, +) (zoekt.Document, error) { + blob, err := repos[key].Repo.BlobObject(key.ID) + if err != nil { + return zoekt.Document{}, err + } + + keyFullPath := key.FullPath() + + if blob.Size > int64(opts.BuildOptions.SizeMax) && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) { + return zoekt.Document{ + SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.BuildOptions.SizeMax), + Name: key.FullPath(), + Branches: branchMap[key], + SubRepositoryPath: key.SubRepoPath, + }, nil + } + + contents, err := blobContents(blob) + if err != nil { + return zoekt.Document{}, err + } + + var pathRanks []float64 + if len(ranks.Paths) > 0 { + // If the repository has ranking data, then store the file's rank. + pathRank := ranks.rank(keyFullPath) + pathRanks = []float64{pathRank} + } + + return zoekt.Document{ + SubRepositoryPath: key.SubRepoPath, + Name: keyFullPath, + Content: contents, + Branches: branchMap[key], + Ranks: pathRanks, + }, nil +} + func blobContents(blob *object.Blob) ([]byte, error) { r, err := blob.Reader() if err != nil { @@ -908,15 +931,3 @@ func blobContents(blob *object.Blob) ([]byte, error) { } return buf.Bytes(), nil } - -func uniq(ss []string) []string { - result := ss[:0] - var last string - for i, s := range ss { - if i == 0 || s != last { - result = append(result, s) - } - last = s - } - return result -} From 852058d3caf2a2a49289b1fa6af07bc3a9a3234a Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 14 Nov 2023 14:59:10 -0800 Subject: [PATCH 2/3] Avoid moving uniq function --- gitindex/index.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/gitindex/index.go b/gitindex/index.go index b0e87a951..c755736c7 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -860,18 +860,6 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ return repos, branchMap, branchVersions, nil } -func uniq(ss []string) []string { - result := ss[:0] - var last string - for i, s := range ss { - if i == 0 || s != last { - result = append(result, s) - } - last = s - } - return result -} - func createDocument( key fileKey, repos map[fileKey]BlobLocation, @@ -931,3 +919,15 @@ func blobContents(blob *object.Blob) ([]byte, error) { } return buf.Bytes(), nil } + +func uniq(ss []string) []string { + result := ss[:0] + var last string + for i, s := range ss { + if i == 0 || s != last { + result = append(result, s) + } + last = s + } + return result +} From 8562dda110ecd6b6ef83a32a734fd8d1713f664a Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 15 Nov 2023 07:47:56 -0800 Subject: [PATCH 3/3] Pass BuildOptions directly --- gitindex/index.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gitindex/index.go b/gitindex/index.go index c755736c7..d3a5e3c64 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -546,7 +546,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) error { keys := fileKeys[name] for _, key := range keys { - doc, err := createDocument(key, repos, branchMap, ranks, opts) + doc, err := createDocument(key, repos, branchMap, ranks, opts.BuildOptions) if err != nil { return err } @@ -860,12 +860,11 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ return repos, branchMap, branchVersions, nil } -func createDocument( - key fileKey, +func createDocument(key fileKey, repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, ranks repoPathRanks, - opts Options, + opts build.Options, ) (zoekt.Document, error) { blob, err := repos[key].Repo.BlobObject(key.ID) if err != nil { @@ -873,10 +872,9 @@ func createDocument( } keyFullPath := key.FullPath() - - if blob.Size > int64(opts.BuildOptions.SizeMax) && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) { + if blob.Size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(keyFullPath) { return zoekt.Document{ - SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.BuildOptions.SizeMax), + SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.SizeMax), Name: key.FullPath(), Branches: branchMap[key], SubRepositoryPath: key.SubRepoPath,