From 1bbd4f83d5ce2ad37b484d33fbdcd933d40481d4 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Mon, 6 Nov 2023 11:26:30 +0200 Subject: [PATCH 1/2] build: use bytes.IndexByte for faster newLinesIndices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On my machine this reduces wall clock time of BenchmarkTagsToSections by 38%. This is faster since bytes.IndexByte relies on CPU specific optimizations to find the next new line (eg uses AVX2 if available). goos: linux goarch: amd64 pkg: github.com/sourcegraph/zoekt/build cpu: AMD Ryzen 9 5950X 16-Core Processor old time/op new time/op delta 188µs ± 7% 117µs ± 4% -37.96% (p=0.000 n=10+10) old alloc/op new alloc/op delta 79.3kB ± 0% 79.3kB ± 0% ~ (all equal) old allocs/op new allocs/op delta 443 ± 0% 443 ± 0% ~ (all equal) Test Plan: go test -bench BenchmarkTagsToSections --- build/ctags.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/build/ctags.go b/build/ctags.go index ae94d6278..ee33f18e1 100644 --- a/build/ctags.go +++ b/build/ctags.go @@ -169,11 +169,19 @@ func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSectio } func newLinesIndices(in []byte) []uint32 { + off := uint32(0) out := make([]uint32, 0, len(in)/30) - for i, c := range in { - if c == '\n' { - out = append(out, uint32(i)) + for len(in) > 0 { + i := bytes.IndexByte(in, '\n') + if i < 0 { + return out } + + off += uint32(i) + out = append(out, off) + + in = in[i+1:] + off++ } return out } From fd5fdea2d18461df5bc987f1d1306149328dd2b4 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Mon, 6 Nov 2023 11:49:19 +0200 Subject: [PATCH 2/2] build: reuse nls slice between calls to tagsToSections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed in the profiler a nonsignificant chunk in the garbage collector. The slice built by newLinesIndices is allocated and thrown away for each call to tagsToSections. This means we can re-use it which this commit implements by introducing a struct storing the buffer. We now use this buffer per shard of symbols we analyse. Even with the improvements of the previous commit, this further improves performance by 13% and halves memory use: $ benchstat after.txt after2.txt old time/op new time/op delta 117µs ± 4% 101µs ± 3% -13.12% (p=0.000 n=10+10) old alloc/op new alloc/op delta 79.3kB ± 0% 36.3kB ± 0% -54.24% (p=0.000 n=9+10) old allocs/op new allocs/op delta 443 ± 0% 441 ± 0% -0.45% (p=0.000 n=10+10) Test Plan: go test -bench BenchmarkTagsToSections --- build/ctags.go | 41 ++++++++++++++++++++++++++++++++--------- build/ctags_test.go | 14 ++++++++------ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/build/ctags.go b/build/ctags.go index ee33f18e1..5d979b35c 100644 --- a/build/ctags.go +++ b/build/ctags.go @@ -46,6 +46,8 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language monitor := newMonitor() defer monitor.Stop() + var tagsToSections tagsToSections + for _, doc := range todo { if doc.Symbols != nil { continue @@ -78,7 +80,7 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language continue } - symOffsets, symMetaData, err := tagsToSections(doc.Content, es) + symOffsets, symMetaData, err := tagsToSections.Convert(doc.Content, es) if err != nil { return fmt.Errorf("%s: %v", doc.Name, err) } @@ -109,11 +111,19 @@ func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { return i + 1 } -// tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection) -// with corresponding metadata (zoekt.Symbol). -func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { - nls := newLinesIndices(content) - nls = append(nls, uint32(len(content))) +// tagsToSections contains buffers to be reused between conversions of bytes +// ranges to metadata. This is done to reduce pressure on the garbage +// collector. +type tagsToSections struct { + nlsBuf []uint32 +} + +// Convert ctags entries to byte ranges (zoekt.DocumentSection) with +// corresponding metadata (zoekt.Symbol). +// +// This can not be called concurrently. +func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { + nls := t.newLinesIndices(content) symOffsets := make([]zoekt.DocumentSection, 0, len(tags)) symMetaData := make([]*zoekt.Symbol, 0, len(tags)) @@ -168,13 +178,22 @@ func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSectio return symOffsets, symMetaData, nil } -func newLinesIndices(in []byte) []uint32 { +// newLinesIndices returns an array of all indexes of '\n' aswell as a final +// value for the length of the document. +func (t *tagsToSections) newLinesIndices(in []byte) []uint32 { + // reuse nlsBuf between calls to tagsToSections.Convert + out := t.nlsBuf + if out == nil { + out = make([]uint32, 0, len(in)/30) + } + + finalEntry := uint32(len(in)) off := uint32(0) - out := make([]uint32, 0, len(in)/30) for len(in) > 0 { i := bytes.IndexByte(in, '\n') if i < 0 { - return out + out = append(out, finalEntry) + break } off += uint32(i) @@ -183,6 +202,10 @@ func newLinesIndices(in []byte) []uint32 { in = in[i+1:] off++ } + + // save buffer for reuse + t.nlsBuf = out[:0] + return out } diff --git a/build/ctags_test.go b/build/ctags_test.go index b7b3cfcd2..386d07e75 100644 --- a/build/ctags_test.go +++ b/build/ctags_test.go @@ -34,7 +34,7 @@ func TestTagsToSections(t *testing.T) { }, } - secs, _, err := tagsToSections(c, tags) + secs, _, err := (&tagsToSections{}).Convert(c, tags) if err != nil { t.Fatal("tagsToSections", err) } @@ -59,7 +59,7 @@ func TestTagsToSectionsMultiple(t *testing.T) { }, } - got, _, err := tagsToSections(c, tags) + got, _, err := (&tagsToSections{}).Convert(c, tags) if err != nil { t.Fatal("tagsToSections", err) } @@ -92,7 +92,7 @@ func TestTagsToSectionsReverse(t *testing.T) { }, } - got, _, err := tagsToSections(c, tags) + got, _, err := (&tagsToSections{}).Convert(c, tags) if err != nil { t.Fatal("tagsToSections", err) } @@ -118,7 +118,7 @@ func TestTagsToSectionsEOF(t *testing.T) { }, } - secs, _, err := tagsToSections(c, tags) + secs, _, err := (&tagsToSections{}).Convert(c, tags) if err != nil { t.Fatal("tagsToSections", err) } @@ -242,12 +242,14 @@ func BenchmarkTagsToSections(b *testing.B) { b.Fatal(err) } + var tagsToSections tagsToSections + entries, err := parser.Parse("./testdata/large_file.cc", file) if err != nil { b.Fatal(err) } - secs, _, err := tagsToSections(file, entries) + secs, _, err := tagsToSections.Convert(file, entries) if err != nil { b.Fatal(err) } @@ -260,7 +262,7 @@ func BenchmarkTagsToSections(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { - _, _, err := tagsToSections(file, entries) + _, _, err := tagsToSections.Convert(file, entries) if err != nil { b.Fatal(err) }