Skip to content

Commit

Permalink
Pull out document creation into its own method (#693)
Browse files Browse the repository at this point in the history
I experimented with some changes to encourage `go-git` to use less memory. They
didn't pan out, but this intermediate refactor felt useful on its own. It helps
break up the super long `indexGitRepo` method.
  • Loading branch information
jtibshirani authored Nov 15, 2023
1 parent 137eb8f commit 3109882
Showing 1 changed file with 45 additions and 36 deletions.
81 changes: 45 additions & 36 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,46 +546,13 @@ func indexGitRepo(opts Options, config gitIndexConfig) error {
keys := fileKeys[name]

for _, key := range keys {
brs := branchMap[key]
blob, err := repos[key].Repo.BlobObject(key.ID)
doc, err := createDocument(key, repos, branchMap, ranks, opts.BuildOptions)
if err != nil {
return err
}

keyFullPath := key.FullPath()

if blob.Size > int64(opts.BuildOptions.SizeMax) && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) {
if err := builder.Add(zoekt.Document{
SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.BuildOptions.SizeMax),
Name: keyFullPath,
Branches: brs,
SubRepositoryPath: key.SubRepoPath,
}); err != nil {
return err
}
continue
}

contents, err := blobContents(blob)
if err != nil {
return err
}

var pathRanks []float64
if len(ranks.Paths) > 0 {
// If the repository has ranking data, then store the file's rank.
pathRank := ranks.rank(keyFullPath)
pathRanks = []float64{pathRank}
}

if err := builder.Add(zoekt.Document{
SubRepositoryPath: key.SubRepoPath,
Name: keyFullPath,
Content: contents,
Branches: brs,
Ranks: pathRanks,
}); err != nil {
return fmt.Errorf("error adding document with name %s: %w", keyFullPath, err)
if err := builder.Add(doc); err != nil {
return fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err)
}
}
}
Expand Down Expand Up @@ -893,6 +860,48 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[
return repos, branchMap, branchVersions, nil
}

func createDocument(key fileKey,
repos map[fileKey]BlobLocation,
branchMap map[fileKey][]string,
ranks repoPathRanks,
opts build.Options,
) (zoekt.Document, error) {
blob, err := repos[key].Repo.BlobObject(key.ID)
if err != nil {
return zoekt.Document{}, err
}

keyFullPath := key.FullPath()
if blob.Size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(keyFullPath) {
return zoekt.Document{
SkipReason: fmt.Sprintf("file size %d exceeds maximum size %d", blob.Size, opts.SizeMax),
Name: key.FullPath(),
Branches: branchMap[key],
SubRepositoryPath: key.SubRepoPath,
}, nil
}

contents, err := blobContents(blob)
if err != nil {
return zoekt.Document{}, err
}

var pathRanks []float64
if len(ranks.Paths) > 0 {
// If the repository has ranking data, then store the file's rank.
pathRank := ranks.rank(keyFullPath)
pathRanks = []float64{pathRank}
}

return zoekt.Document{
SubRepositoryPath: key.SubRepoPath,
Name: keyFullPath,
Content: contents,
Branches: branchMap[key],
Ranks: pathRanks,
}, nil
}

func blobContents(blob *object.Blob) ([]byte, error) {
r, err := blob.Reader()
if err != nil {
Expand Down

0 comments on commit 3109882

Please sign in to comment.