all: adjust field order for match structs (#716)

Structs related to matches can occur a lot in memory. As such there is some value to ensuring the order of the fields is aligned to avoid unneccessary padding. The "fieldalignment" tool was used to find these changes. Test Plan: go test
sourcegraph · Jan 12, 2024 · 7e44ea7 · 7e44ea7
1 parent 6463096
commit 7e44ea7
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 48 deletions.
diff --git a/api.go b/api.go
@@ -34,52 +34,53 @@ const interfaceBytes uint64 = 16
 
 // FileMatch contains all the matches within a file.
 type FileMatch struct {
-	// Ranking; the higher, the better.
-	Score float64 // TODO - hide this field?
-
-	// For debugging. Needs DebugScore set, but public so tests in
-	// other packages can print some diagnostics.
-	Debug string
-
 	FileName string
 
 	// Repository is the globally unique name of the repo of the
 	// match
 	Repository string
-	Branches   []string
+
+	// SubRepositoryName is the globally unique name of the repo,
+	// if it came from a subrepository
+	SubRepositoryName string
+
+	// SubRepositoryPath holds the prefix where the subrepository
+	// was mounted.
+	SubRepositoryPath string
+
+	// Commit SHA1 (hex) of the (sub)repo holding the file.
+	Version string
+
+	// Detected language of the result.
+	Language string
+
+	// For debugging. Needs DebugScore set, but public so tests in
+	// other packages can print some diagnostics.
+	Debug string
+
+	Branches []string
 
 	// One of LineMatches or ChunkMatches will be returned depending on whether
 	// the SearchOptions.ChunkMatches is set.
 	LineMatches  []LineMatch
 	ChunkMatches []ChunkMatch
 
-	// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
-	// Sourcegraph.
-	RepositoryID uint32
-
-	// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
-	// order results from different repositories relative to each other.
-	RepositoryPriority float64
-
 	// Only set if requested
 	Content []byte
 
 	// Checksum of the content.
 	Checksum []byte
 
-	// Detected language of the result.
-	Language string
-
-	// SubRepositoryName is the globally unique name of the repo,
-	// if it came from a subrepository
-	SubRepositoryName string
+	// Ranking; the higher, the better.
+	Score float64 // TODO - hide this field?
 
-	// SubRepositoryPath holds the prefix where the subrepository
-	// was mounted.
-	SubRepositoryPath string
+	// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
+	// order results from different repositories relative to each other.
+	RepositoryPriority float64
 
-	// Commit SHA1 (hex) of the (sub)repo holding the file.
-	Version string
+	// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
+	// Sourcegraph.
+	RepositoryID uint32
 }
 
 func (m *FileMatch) sizeBytes() (sz uint64) {
@@ -134,16 +135,10 @@ func (m *FileMatch) sizeBytes() (sz uint64) {
 // ChunkMatch is a set of non-overlapping matches within a contiguous range of
 // lines in the file.
 type ChunkMatch struct {
+	DebugScore string
+
 	// Content is a contiguous range of complete lines that fully contains Ranges.
 	Content []byte
-	// ContentStart is the location (inclusive) of the beginning of content
-	// relative to the beginning of the file. It will always be at the
-	// beginning of a line (Column will always be 1).
-	ContentStart Location
-
-	// FileName indicates whether this match is a match on the file name, in
-	// which case Content will contain the file name.
-	FileName bool
 
 	// Ranges is a set of matching ranges within this chunk. Each range is relative
 	// to the beginning of the file (not the beginning of Content).
@@ -153,8 +148,16 @@ type ChunkMatch struct {
 	// its length will equal that of Ranges. Any of its elements may be nil.
 	SymbolInfo []*Symbol
 
-	Score      float64
-	DebugScore string
+	// FileName indicates whether this match is a match on the file name, in
+	// which case Content will contain the file name.
+	FileName bool
+
+	// ContentStart is the location (inclusive) of the beginning of content
+	// relative to the beginning of the file. It will always be at the
+	// beginning of a line (Column will always be 1).
+	ContentStart Location
+
+	Score float64
 }
 
 func (cm *ChunkMatch) sizeBytes() (sz uint64) {

diff --git a/api_test.go b/api_test.go
@@ -17,6 +17,7 @@ package zoekt // import "github.com/sourcegraph/zoekt"
 import (
 	"bytes"
 	"encoding/gob"
+	"reflect"
 	"strings"
 	"testing"
 	"time"
@@ -136,3 +137,31 @@ func TestSizeBytesChunkMatches(t *testing.T) {
 		t.Fatalf("want %d, got %d", wantBytes, cm.sizeBytes())
 	}
 }
+
+func TestMatchSize(t *testing.T) {
+	cases := []struct {
+		v    any
+		size int
+	}{{
+		v:    FileMatch{},
+		size: 256,
+	}, {
+		v:    ChunkMatch{},
+		size: 112,
+	}, {
+		v:    candidateMatch{},
+		size: 72,
+	}, {
+		v:    candidateChunk{},
+		size: 40,
+	}}
+	for _, c := range cases {
+		got := reflect.TypeOf(c.v).Size()
+		if int(got) != c.size {
+			t.Errorf(`sizeof struct %T has changed from %d to %d.
+These are match structs that occur a lot in memory, so we optimize size.
+When changing, please ensure there isn't unnecessary padding via the
+tool fieldalignment then update this test.`, c.v, c.size, got)
+		}
+	}
+}
diff --git a/contentprovider.go b/contentprovider.go
@@ -369,11 +369,11 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
 }
 
 type candidateChunk struct {
+	candidates []*candidateMatch
 	firstLine  uint32 // 1-based, inclusive
 	lastLine   uint32 // 1-based, inclusive
 	minOffset  uint32 // 0-based, inclusive
 	maxOffset  uint32 // 0-based, exclusive
-	candidates []*candidateMatch
 }
 
 // chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
@@ -566,8 +566,8 @@ func findSection(secs []DocumentSection, off, sz uint32) (int, bool) {
 
 func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) {
 	type debugScore struct {
-		score float64
 		what  string
+		score float64
 	}
 
 	score := &debugScore{}
@@ -654,8 +654,8 @@ func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch,
 
 func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) {
 	type debugScore struct {
-		score float64
 		what  string
+		score float64
 	}
 
 	score := &debugScore{}

diff --git a/hititer.go b/hititer.go
@@ -35,10 +35,10 @@ type hitIterator interface {
 
 // distanceHitIterator looks for hits at a fixed distance apart.
 type distanceHitIterator struct {
-	started  bool
-	distance uint32
 	i1       hitIterator
 	i2       hitIterator
+	distance uint32
+	started  bool
 }
 
 func (i *distanceHitIterator) String() string {

diff --git a/matchiter.go b/matchiter.go
@@ -20,21 +20,25 @@ import (
 )
 
 // candidateMatch is a candidate match for a substring.
+//
+// Note: a lot of these can be in memory, so think about fieldalignment when
+// modify the fields of this structure.
 type candidateMatch struct {
-	caseSensitive bool
-	fileName      bool
-	symbol        bool
-	symbolIdx     uint32
-
 	substrBytes   []byte
 	substrLowered []byte
 
-	file uint32
+	file      uint32
+	symbolIdx uint32
 
 	// Offsets are relative to the start of the filename or file contents.
 	runeOffset  uint32
 	byteOffset  uint32
 	byteMatchSz uint32
+
+	// bools at end for struct field alignment
+	caseSensitive bool
+	fileName      bool
+	symbol        bool
 }
 
 // Matches content against the substring, and populates byteMatchSz on success