Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: faster newLinesIndices via bytes.IndexByte and buffer re-use #680

Merged
merged 2 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions build/ctags.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language
monitor := newMonitor()
defer monitor.Stop()

var tagsToSections tagsToSections

for _, doc := range todo {
if doc.Symbols != nil {
continue
Expand Down Expand Up @@ -78,7 +80,7 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language
continue
}

symOffsets, symMetaData, err := tagsToSections(doc.Content, es)
symOffsets, symMetaData, err := tagsToSections.Convert(doc.Content, es)
if err != nil {
return fmt.Errorf("%s: %v", doc.Name, err)
}
Expand Down Expand Up @@ -109,11 +111,19 @@ func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int {
return i + 1
}

// tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection)
// with corresponding metadata (zoekt.Symbol).
func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
nls := newLinesIndices(content)
nls = append(nls, uint32(len(content)))
// tagsToSections contains buffers to be reused between conversions of bytes
// ranges to metadata. This is done to reduce pressure on the garbage
// collector.
type tagsToSections struct {
nlsBuf []uint32
}

// Convert ctags entries to byte ranges (zoekt.DocumentSection) with
// corresponding metadata (zoekt.Symbol).
//
// This can not be called concurrently.
func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
nls := t.newLinesIndices(content)
symOffsets := make([]zoekt.DocumentSection, 0, len(tags))
symMetaData := make([]*zoekt.Symbol, 0, len(tags))

Expand Down Expand Up @@ -168,13 +178,34 @@ func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSectio
return symOffsets, symMetaData, nil
}

func newLinesIndices(in []byte) []uint32 {
out := make([]uint32, 0, len(in)/30)
for i, c := range in {
if c == '\n' {
out = append(out, uint32(i))
// newLinesIndices returns an array of all indexes of '\n' aswell as a final
// value for the length of the document.
func (t *tagsToSections) newLinesIndices(in []byte) []uint32 {
// reuse nlsBuf between calls to tagsToSections.Convert
out := t.nlsBuf
if out == nil {
out = make([]uint32, 0, len(in)/30)
}

finalEntry := uint32(len(in))
off := uint32(0)
for len(in) > 0 {
i := bytes.IndexByte(in, '\n')
if i < 0 {
out = append(out, finalEntry)
break
}

off += uint32(i)
out = append(out, off)

in = in[i+1:]
off++
}

// save buffer for reuse
t.nlsBuf = out[:0]

return out
}

Expand Down
14 changes: 8 additions & 6 deletions build/ctags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func TestTagsToSections(t *testing.T) {
},
}

secs, _, err := tagsToSections(c, tags)
secs, _, err := (&tagsToSections{}).Convert(c, tags)
if err != nil {
t.Fatal("tagsToSections", err)
}
Expand All @@ -59,7 +59,7 @@ func TestTagsToSectionsMultiple(t *testing.T) {
},
}

got, _, err := tagsToSections(c, tags)
got, _, err := (&tagsToSections{}).Convert(c, tags)
if err != nil {
t.Fatal("tagsToSections", err)
}
Expand Down Expand Up @@ -92,7 +92,7 @@ func TestTagsToSectionsReverse(t *testing.T) {
},
}

got, _, err := tagsToSections(c, tags)
got, _, err := (&tagsToSections{}).Convert(c, tags)
if err != nil {
t.Fatal("tagsToSections", err)
}
Expand All @@ -118,7 +118,7 @@ func TestTagsToSectionsEOF(t *testing.T) {
},
}

secs, _, err := tagsToSections(c, tags)
secs, _, err := (&tagsToSections{}).Convert(c, tags)
if err != nil {
t.Fatal("tagsToSections", err)
}
Expand Down Expand Up @@ -242,12 +242,14 @@ func BenchmarkTagsToSections(b *testing.B) {
b.Fatal(err)
}

var tagsToSections tagsToSections

entries, err := parser.Parse("./testdata/large_file.cc", file)
if err != nil {
b.Fatal(err)
}

secs, _, err := tagsToSections(file, entries)
secs, _, err := tagsToSections.Convert(file, entries)
if err != nil {
b.Fatal(err)
}
Expand All @@ -260,7 +262,7 @@ func BenchmarkTagsToSections(b *testing.B) {
b.ReportAllocs()

for n := 0; n < b.N; n++ {
_, _, err := tagsToSections(file, entries)
_, _, err := tagsToSections.Convert(file, entries)
if err != nil {
b.Fatal(err)
}
Expand Down
Loading