From 7e44ea78f6446f0d2e92b6f90d1e748ce19e2b47 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Fri, 12 Jan 2024 08:43:40 +0200 Subject: [PATCH] all: adjust field order for match structs (#716) Structs related to matches can occur a lot in memory. As such there is some value to ensuring the order of the fields is aligned to avoid unneccessary padding. The "fieldalignment" tool was used to find these changes. Test Plan: go test --- api.go | 77 ++++++++++++++++++++++++---------------------- api_test.go | 29 +++++++++++++++++ contentprovider.go | 6 ++-- hititer.go | 4 +-- matchiter.go | 16 ++++++---- 5 files changed, 84 insertions(+), 48 deletions(-) diff --git a/api.go b/api.go index da47cef83..e3f90852f 100644 --- a/api.go +++ b/api.go @@ -34,52 +34,53 @@ const interfaceBytes uint64 = 16 // FileMatch contains all the matches within a file. type FileMatch struct { - // Ranking; the higher, the better. - Score float64 // TODO - hide this field? - - // For debugging. Needs DebugScore set, but public so tests in - // other packages can print some diagnostics. - Debug string - FileName string // Repository is the globally unique name of the repo of the // match Repository string - Branches []string + + // SubRepositoryName is the globally unique name of the repo, + // if it came from a subrepository + SubRepositoryName string + + // SubRepositoryPath holds the prefix where the subrepository + // was mounted. + SubRepositoryPath string + + // Commit SHA1 (hex) of the (sub)repo holding the file. + Version string + + // Detected language of the result. + Language string + + // For debugging. Needs DebugScore set, but public so tests in + // other packages can print some diagnostics. + Debug string + + Branches []string // One of LineMatches or ChunkMatches will be returned depending on whether // the SearchOptions.ChunkMatches is set. LineMatches []LineMatch ChunkMatches []ChunkMatch - // RepositoryID is a Sourcegraph extension. This is the ID of Repository in - // Sourcegraph. - RepositoryID uint32 - - // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to - // order results from different repositories relative to each other. - RepositoryPriority float64 - // Only set if requested Content []byte // Checksum of the content. Checksum []byte - // Detected language of the result. - Language string - - // SubRepositoryName is the globally unique name of the repo, - // if it came from a subrepository - SubRepositoryName string + // Ranking; the higher, the better. + Score float64 // TODO - hide this field? - // SubRepositoryPath holds the prefix where the subrepository - // was mounted. - SubRepositoryPath string + // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to + // order results from different repositories relative to each other. + RepositoryPriority float64 - // Commit SHA1 (hex) of the (sub)repo holding the file. - Version string + // RepositoryID is a Sourcegraph extension. This is the ID of Repository in + // Sourcegraph. + RepositoryID uint32 } func (m *FileMatch) sizeBytes() (sz uint64) { @@ -134,16 +135,10 @@ func (m *FileMatch) sizeBytes() (sz uint64) { // ChunkMatch is a set of non-overlapping matches within a contiguous range of // lines in the file. type ChunkMatch struct { + DebugScore string + // Content is a contiguous range of complete lines that fully contains Ranges. Content []byte - // ContentStart is the location (inclusive) of the beginning of content - // relative to the beginning of the file. It will always be at the - // beginning of a line (Column will always be 1). - ContentStart Location - - // FileName indicates whether this match is a match on the file name, in - // which case Content will contain the file name. - FileName bool // Ranges is a set of matching ranges within this chunk. Each range is relative // to the beginning of the file (not the beginning of Content). @@ -153,8 +148,16 @@ type ChunkMatch struct { // its length will equal that of Ranges. Any of its elements may be nil. SymbolInfo []*Symbol - Score float64 - DebugScore string + // FileName indicates whether this match is a match on the file name, in + // which case Content will contain the file name. + FileName bool + + // ContentStart is the location (inclusive) of the beginning of content + // relative to the beginning of the file. It will always be at the + // beginning of a line (Column will always be 1). + ContentStart Location + + Score float64 } func (cm *ChunkMatch) sizeBytes() (sz uint64) { diff --git a/api_test.go b/api_test.go index 8d26251d8..fe860d58c 100644 --- a/api_test.go +++ b/api_test.go @@ -17,6 +17,7 @@ package zoekt // import "github.com/sourcegraph/zoekt" import ( "bytes" "encoding/gob" + "reflect" "strings" "testing" "time" @@ -136,3 +137,31 @@ func TestSizeBytesChunkMatches(t *testing.T) { t.Fatalf("want %d, got %d", wantBytes, cm.sizeBytes()) } } + +func TestMatchSize(t *testing.T) { + cases := []struct { + v any + size int + }{{ + v: FileMatch{}, + size: 256, + }, { + v: ChunkMatch{}, + size: 112, + }, { + v: candidateMatch{}, + size: 72, + }, { + v: candidateChunk{}, + size: 40, + }} + for _, c := range cases { + got := reflect.TypeOf(c.v).Size() + if int(got) != c.size { + t.Errorf(`sizeof struct %T has changed from %d to %d. +These are match structs that occur a lot in memory, so we optimize size. +When changing, please ensure there isn't unnecessary padding via the +tool fieldalignment then update this test.`, c.v, c.size, got) + } + } +} diff --git a/contentprovider.go b/contentprovider.go index 68c41cfe1..89ddebbb5 100644 --- a/contentprovider.go +++ b/contentprovider.go @@ -369,11 +369,11 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte } type candidateChunk struct { + candidates []*candidateMatch firstLine uint32 // 1-based, inclusive lastLine uint32 // 1-based, inclusive minOffset uint32 // 0-based, inclusive maxOffset uint32 // 0-based, exclusive - candidates []*candidateMatch } // chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent @@ -566,8 +566,8 @@ func findSection(secs []DocumentSection, off, sz uint32) (int, bool) { func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) { type debugScore struct { - score float64 what string + score float64 } score := &debugScore{} @@ -654,8 +654,8 @@ func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) { type debugScore struct { - score float64 what string + score float64 } score := &debugScore{} diff --git a/hititer.go b/hititer.go index 89204e6d6..01a58d1e1 100644 --- a/hititer.go +++ b/hititer.go @@ -35,10 +35,10 @@ type hitIterator interface { // distanceHitIterator looks for hits at a fixed distance apart. type distanceHitIterator struct { - started bool - distance uint32 i1 hitIterator i2 hitIterator + distance uint32 + started bool } func (i *distanceHitIterator) String() string { diff --git a/matchiter.go b/matchiter.go index e7d3f39a2..68c6e4856 100644 --- a/matchiter.go +++ b/matchiter.go @@ -20,21 +20,25 @@ import ( ) // candidateMatch is a candidate match for a substring. +// +// Note: a lot of these can be in memory, so think about fieldalignment when +// modify the fields of this structure. type candidateMatch struct { - caseSensitive bool - fileName bool - symbol bool - symbolIdx uint32 - substrBytes []byte substrLowered []byte - file uint32 + file uint32 + symbolIdx uint32 // Offsets are relative to the start of the filename or file contents. runeOffset uint32 byteOffset uint32 byteMatchSz uint32 + + // bools at end for struct field alignment + caseSensitive bool + fileName bool + symbol bool } // Matches content against the substring, and populates byteMatchSz on success