From 7e44ea78f6446f0d2e92b6f90d1e748ce19e2b47 Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Fri, 12 Jan 2024 08:43:40 +0200
Subject: [PATCH] all: adjust field order for match structs (#716)

Structs related to matches can occur a lot in memory. As such there is
some value to ensuring the order of the fields is aligned to avoid
unneccessary padding.

The "fieldalignment" tool was used to find these changes.

Test Plan: go test
---
 api.go             | 77 ++++++++++++++++++++++++----------------------
 api_test.go        | 29 +++++++++++++++++
 contentprovider.go |  6 ++--
 hititer.go         |  4 +--
 matchiter.go       | 16 ++++++----
 5 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/api.go b/api.go
index da47cef83..e3f90852f 100644
--- a/api.go
+++ b/api.go
@@ -34,52 +34,53 @@ const interfaceBytes uint64 = 16
 
 // FileMatch contains all the matches within a file.
 type FileMatch struct {
-	// Ranking; the higher, the better.
-	Score float64 // TODO - hide this field?
-
-	// For debugging. Needs DebugScore set, but public so tests in
-	// other packages can print some diagnostics.
-	Debug string
-
 	FileName string
 
 	// Repository is the globally unique name of the repo of the
 	// match
 	Repository string
-	Branches   []string
+
+	// SubRepositoryName is the globally unique name of the repo,
+	// if it came from a subrepository
+	SubRepositoryName string
+
+	// SubRepositoryPath holds the prefix where the subrepository
+	// was mounted.
+	SubRepositoryPath string
+
+	// Commit SHA1 (hex) of the (sub)repo holding the file.
+	Version string
+
+	// Detected language of the result.
+	Language string
+
+	// For debugging. Needs DebugScore set, but public so tests in
+	// other packages can print some diagnostics.
+	Debug string
+
+	Branches []string
 
 	// One of LineMatches or ChunkMatches will be returned depending on whether
 	// the SearchOptions.ChunkMatches is set.
 	LineMatches  []LineMatch
 	ChunkMatches []ChunkMatch
 
-	// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
-	// Sourcegraph.
-	RepositoryID uint32
-
-	// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
-	// order results from different repositories relative to each other.
-	RepositoryPriority float64
-
 	// Only set if requested
 	Content []byte
 
 	// Checksum of the content.
 	Checksum []byte
 
-	// Detected language of the result.
-	Language string
-
-	// SubRepositoryName is the globally unique name of the repo,
-	// if it came from a subrepository
-	SubRepositoryName string
+	// Ranking; the higher, the better.
+	Score float64 // TODO - hide this field?
 
-	// SubRepositoryPath holds the prefix where the subrepository
-	// was mounted.
-	SubRepositoryPath string
+	// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
+	// order results from different repositories relative to each other.
+	RepositoryPriority float64
 
-	// Commit SHA1 (hex) of the (sub)repo holding the file.
-	Version string
+	// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
+	// Sourcegraph.
+	RepositoryID uint32
 }
 
 func (m *FileMatch) sizeBytes() (sz uint64) {
@@ -134,16 +135,10 @@ func (m *FileMatch) sizeBytes() (sz uint64) {
 // ChunkMatch is a set of non-overlapping matches within a contiguous range of
 // lines in the file.
 type ChunkMatch struct {
+	DebugScore string
+
 	// Content is a contiguous range of complete lines that fully contains Ranges.
 	Content []byte
-	// ContentStart is the location (inclusive) of the beginning of content
-	// relative to the beginning of the file. It will always be at the
-	// beginning of a line (Column will always be 1).
-	ContentStart Location
-
-	// FileName indicates whether this match is a match on the file name, in
-	// which case Content will contain the file name.
-	FileName bool
 
 	// Ranges is a set of matching ranges within this chunk. Each range is relative
 	// to the beginning of the file (not the beginning of Content).
@@ -153,8 +148,16 @@ type ChunkMatch struct {
 	// its length will equal that of Ranges. Any of its elements may be nil.
 	SymbolInfo []*Symbol
 
-	Score      float64
-	DebugScore string
+	// FileName indicates whether this match is a match on the file name, in
+	// which case Content will contain the file name.
+	FileName bool
+
+	// ContentStart is the location (inclusive) of the beginning of content
+	// relative to the beginning of the file. It will always be at the
+	// beginning of a line (Column will always be 1).
+	ContentStart Location
+
+	Score float64
 }
 
 func (cm *ChunkMatch) sizeBytes() (sz uint64) {
diff --git a/api_test.go b/api_test.go
index 8d26251d8..fe860d58c 100644
--- a/api_test.go
+++ b/api_test.go
@@ -17,6 +17,7 @@ package zoekt // import "github.com/sourcegraph/zoekt"
 import (
 	"bytes"
 	"encoding/gob"
+	"reflect"
 	"strings"
 	"testing"
 	"time"
@@ -136,3 +137,31 @@ func TestSizeBytesChunkMatches(t *testing.T) {
 		t.Fatalf("want %d, got %d", wantBytes, cm.sizeBytes())
 	}
 }
+
+func TestMatchSize(t *testing.T) {
+	cases := []struct {
+		v    any
+		size int
+	}{{
+		v:    FileMatch{},
+		size: 256,
+	}, {
+		v:    ChunkMatch{},
+		size: 112,
+	}, {
+		v:    candidateMatch{},
+		size: 72,
+	}, {
+		v:    candidateChunk{},
+		size: 40,
+	}}
+	for _, c := range cases {
+		got := reflect.TypeOf(c.v).Size()
+		if int(got) != c.size {
+			t.Errorf(`sizeof struct %T has changed from %d to %d.
+These are match structs that occur a lot in memory, so we optimize size.
+When changing, please ensure there isn't unnecessary padding via the
+tool fieldalignment then update this test.`, c.v, c.size, got)
+		}
+	}
+}
diff --git a/contentprovider.go b/contentprovider.go
index 68c41cfe1..89ddebbb5 100644
--- a/contentprovider.go
+++ b/contentprovider.go
@@ -369,11 +369,11 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
 }
 
 type candidateChunk struct {
+	candidates []*candidateMatch
 	firstLine  uint32 // 1-based, inclusive
 	lastLine   uint32 // 1-based, inclusive
 	minOffset  uint32 // 0-based, inclusive
 	maxOffset  uint32 // 0-based, exclusive
-	candidates []*candidateMatch
 }
 
 // chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
@@ -566,8 +566,8 @@ func findSection(secs []DocumentSection, off, sz uint32) (int, bool) {
 
 func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) {
 	type debugScore struct {
-		score float64
 		what  string
+		score float64
 	}
 
 	score := &debugScore{}
@@ -654,8 +654,8 @@ func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch,
 
 func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) {
 	type debugScore struct {
-		score float64
 		what  string
+		score float64
 	}
 
 	score := &debugScore{}
diff --git a/hititer.go b/hititer.go
index 89204e6d6..01a58d1e1 100644
--- a/hititer.go
+++ b/hititer.go
@@ -35,10 +35,10 @@ type hitIterator interface {
 
 // distanceHitIterator looks for hits at a fixed distance apart.
 type distanceHitIterator struct {
-	started  bool
-	distance uint32
 	i1       hitIterator
 	i2       hitIterator
+	distance uint32
+	started  bool
 }
 
 func (i *distanceHitIterator) String() string {
diff --git a/matchiter.go b/matchiter.go
index e7d3f39a2..68c6e4856 100644
--- a/matchiter.go
+++ b/matchiter.go
@@ -20,21 +20,25 @@ import (
 )
 
 // candidateMatch is a candidate match for a substring.
+//
+// Note: a lot of these can be in memory, so think about fieldalignment when
+// modify the fields of this structure.
 type candidateMatch struct {
-	caseSensitive bool
-	fileName      bool
-	symbol        bool
-	symbolIdx     uint32
-
 	substrBytes   []byte
 	substrLowered []byte
 
-	file uint32
+	file      uint32
+	symbolIdx uint32
 
 	// Offsets are relative to the start of the filename or file contents.
 	runeOffset  uint32
 	byteOffset  uint32
 	byteMatchSz uint32
+
+	// bools at end for struct field alignment
+	caseSensitive bool
+	fileName      bool
+	symbol        bool
 }
 
 // Matches content against the substring, and populates byteMatchSz on success