From 1bbd4f83d5ce2ad37b484d33fbdcd933d40481d4 Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Mon, 6 Nov 2023 11:26:30 +0200
Subject: [PATCH 1/2] build: use bytes.IndexByte for faster newLinesIndices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On my machine this reduces wall clock time of BenchmarkTagsToSections by
38%. This is faster since bytes.IndexByte relies on CPU specific
optimizations to find the next new line (eg uses AVX2 if available).

  goos: linux
  goarch: amd64
  pkg: github.com/sourcegraph/zoekt/build
  cpu: AMD Ryzen 9 5950X 16-Core Processor

  old time/op    new time/op    delta
     188µs ± 7%     117µs ± 4%  -37.96%  (p=0.000 n=10+10)

  old alloc/op   new alloc/op   delta
    79.3kB ± 0%    79.3kB ± 0%     ~     (all equal)

  old allocs/op  new allocs/op  delta
       443 ± 0%       443 ± 0%     ~     (all equal)

Test Plan: go test -bench BenchmarkTagsToSections
---
 build/ctags.go | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/build/ctags.go b/build/ctags.go
index ae94d6278..ee33f18e1 100644
--- a/build/ctags.go
+++ b/build/ctags.go
@@ -169,11 +169,19 @@ func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSectio
 }
 
 func newLinesIndices(in []byte) []uint32 {
+	off := uint32(0)
 	out := make([]uint32, 0, len(in)/30)
-	for i, c := range in {
-		if c == '\n' {
-			out = append(out, uint32(i))
+	for len(in) > 0 {
+		i := bytes.IndexByte(in, '\n')
+		if i < 0 {
+			return out
 		}
+
+		off += uint32(i)
+		out = append(out, off)
+
+		in = in[i+1:]
+		off++
 	}
 	return out
 }

From fd5fdea2d18461df5bc987f1d1306149328dd2b4 Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Mon, 6 Nov 2023 11:49:19 +0200
Subject: [PATCH 2/2] build: reuse nls slice between calls to tagsToSections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I noticed in the profiler a nonsignificant chunk in the garbage
collector. The slice built by newLinesIndices is allocated and thrown
away for each call to tagsToSections. This means we can re-use it which
this commit implements by introducing a struct storing the buffer. We
now use this buffer per shard of symbols we analyse.

Even with the improvements of the previous commit, this further improves
performance by 13% and halves memory use:

  $ benchstat after.txt after2.txt
  old time/op    new time/op    delta
     117µs ± 4%     101µs ± 3%  -13.12%  (p=0.000 n=10+10)

  old alloc/op   new alloc/op   delta
    79.3kB ± 0%    36.3kB ± 0%  -54.24%  (p=0.000 n=9+10)

  old allocs/op  new allocs/op  delta
       443 ± 0%       441 ± 0%   -0.45%  (p=0.000 n=10+10)

Test Plan: go test -bench BenchmarkTagsToSections
---
 build/ctags.go      | 41 ++++++++++++++++++++++++++++++++---------
 build/ctags_test.go | 14 ++++++++------
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/build/ctags.go b/build/ctags.go
index ee33f18e1..5d979b35c 100644
--- a/build/ctags.go
+++ b/build/ctags.go
@@ -46,6 +46,8 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language
 	monitor := newMonitor()
 	defer monitor.Stop()
 
+	var tagsToSections tagsToSections
+
 	for _, doc := range todo {
 		if doc.Symbols != nil {
 			continue
@@ -78,7 +80,7 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language
 			continue
 		}
 
-		symOffsets, symMetaData, err := tagsToSections(doc.Content, es)
+		symOffsets, symMetaData, err := tagsToSections.Convert(doc.Content, es)
 		if err != nil {
 			return fmt.Errorf("%s: %v", doc.Name, err)
 		}
@@ -109,11 +111,19 @@ func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int {
 	return i + 1
 }
 
-// tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection)
-// with corresponding metadata (zoekt.Symbol).
-func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
-	nls := newLinesIndices(content)
-	nls = append(nls, uint32(len(content)))
+// tagsToSections contains buffers to be reused between conversions of bytes
+// ranges to metadata. This is done to reduce pressure on the garbage
+// collector.
+type tagsToSections struct {
+	nlsBuf []uint32
+}
+
+// Convert ctags entries to byte ranges (zoekt.DocumentSection) with
+// corresponding metadata (zoekt.Symbol).
+//
+// This can not be called concurrently.
+func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
+	nls := t.newLinesIndices(content)
 	symOffsets := make([]zoekt.DocumentSection, 0, len(tags))
 	symMetaData := make([]*zoekt.Symbol, 0, len(tags))
 
@@ -168,13 +178,22 @@ func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSectio
 	return symOffsets, symMetaData, nil
 }
 
-func newLinesIndices(in []byte) []uint32 {
+// newLinesIndices returns an array of all indexes of '\n' aswell as a final
+// value for the length of the document.
+func (t *tagsToSections) newLinesIndices(in []byte) []uint32 {
+	// reuse nlsBuf between calls to tagsToSections.Convert
+	out := t.nlsBuf
+	if out == nil {
+		out = make([]uint32, 0, len(in)/30)
+	}
+
+	finalEntry := uint32(len(in))
 	off := uint32(0)
-	out := make([]uint32, 0, len(in)/30)
 	for len(in) > 0 {
 		i := bytes.IndexByte(in, '\n')
 		if i < 0 {
-			return out
+			out = append(out, finalEntry)
+			break
 		}
 
 		off += uint32(i)
@@ -183,6 +202,10 @@ func newLinesIndices(in []byte) []uint32 {
 		in = in[i+1:]
 		off++
 	}
+
+	// save buffer for reuse
+	t.nlsBuf = out[:0]
+
 	return out
 }
 
diff --git a/build/ctags_test.go b/build/ctags_test.go
index b7b3cfcd2..386d07e75 100644
--- a/build/ctags_test.go
+++ b/build/ctags_test.go
@@ -34,7 +34,7 @@ func TestTagsToSections(t *testing.T) {
 		},
 	}
 
-	secs, _, err := tagsToSections(c, tags)
+	secs, _, err := (&tagsToSections{}).Convert(c, tags)
 	if err != nil {
 		t.Fatal("tagsToSections", err)
 	}
@@ -59,7 +59,7 @@ func TestTagsToSectionsMultiple(t *testing.T) {
 		},
 	}
 
-	got, _, err := tagsToSections(c, tags)
+	got, _, err := (&tagsToSections{}).Convert(c, tags)
 	if err != nil {
 		t.Fatal("tagsToSections", err)
 	}
@@ -92,7 +92,7 @@ func TestTagsToSectionsReverse(t *testing.T) {
 		},
 	}
 
-	got, _, err := tagsToSections(c, tags)
+	got, _, err := (&tagsToSections{}).Convert(c, tags)
 	if err != nil {
 		t.Fatal("tagsToSections", err)
 	}
@@ -118,7 +118,7 @@ func TestTagsToSectionsEOF(t *testing.T) {
 		},
 	}
 
-	secs, _, err := tagsToSections(c, tags)
+	secs, _, err := (&tagsToSections{}).Convert(c, tags)
 	if err != nil {
 		t.Fatal("tagsToSections", err)
 	}
@@ -242,12 +242,14 @@ func BenchmarkTagsToSections(b *testing.B) {
 		b.Fatal(err)
 	}
 
+	var tagsToSections tagsToSections
+
 	entries, err := parser.Parse("./testdata/large_file.cc", file)
 	if err != nil {
 		b.Fatal(err)
 	}
 
-	secs, _, err := tagsToSections(file, entries)
+	secs, _, err := tagsToSections.Convert(file, entries)
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -260,7 +262,7 @@ func BenchmarkTagsToSections(b *testing.B) {
 	b.ReportAllocs()
 
 	for n := 0; n < b.N; n++ {
-		_, _, err := tagsToSections(file, entries)
+		_, _, err := tagsToSections.Convert(file, entries)
 		if err != nil {
 			b.Fatal(err)
 		}