Skip to content

Commit

Permalink
all: adjust field order for match structs (#716)
Browse files Browse the repository at this point in the history
Structs related to matches can occur a lot in memory. As such there is
some value to ensuring the order of the fields is aligned to avoid
unneccessary padding.

The "fieldalignment" tool was used to find these changes.

Test Plan: go test
  • Loading branch information
keegancsmith authored Jan 12, 2024
1 parent 6463096 commit 7e44ea7
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 48 deletions.
77 changes: 40 additions & 37 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,52 +34,53 @@ const interfaceBytes uint64 = 16

// FileMatch contains all the matches within a file.
type FileMatch struct {
// Ranking; the higher, the better.
Score float64 // TODO - hide this field?

// For debugging. Needs DebugScore set, but public so tests in
// other packages can print some diagnostics.
Debug string

FileName string

// Repository is the globally unique name of the repo of the
// match
Repository string
Branches []string

// SubRepositoryName is the globally unique name of the repo,
// if it came from a subrepository
SubRepositoryName string

// SubRepositoryPath holds the prefix where the subrepository
// was mounted.
SubRepositoryPath string

// Commit SHA1 (hex) of the (sub)repo holding the file.
Version string

// Detected language of the result.
Language string

// For debugging. Needs DebugScore set, but public so tests in
// other packages can print some diagnostics.
Debug string

Branches []string

// One of LineMatches or ChunkMatches will be returned depending on whether
// the SearchOptions.ChunkMatches is set.
LineMatches []LineMatch
ChunkMatches []ChunkMatch

// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
// Sourcegraph.
RepositoryID uint32

// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
// order results from different repositories relative to each other.
RepositoryPriority float64

// Only set if requested
Content []byte

// Checksum of the content.
Checksum []byte

// Detected language of the result.
Language string

// SubRepositoryName is the globally unique name of the repo,
// if it came from a subrepository
SubRepositoryName string
// Ranking; the higher, the better.
Score float64 // TODO - hide this field?

// SubRepositoryPath holds the prefix where the subrepository
// was mounted.
SubRepositoryPath string
// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
// order results from different repositories relative to each other.
RepositoryPriority float64

// Commit SHA1 (hex) of the (sub)repo holding the file.
Version string
// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
// Sourcegraph.
RepositoryID uint32
}

func (m *FileMatch) sizeBytes() (sz uint64) {
Expand Down Expand Up @@ -134,16 +135,10 @@ func (m *FileMatch) sizeBytes() (sz uint64) {
// ChunkMatch is a set of non-overlapping matches within a contiguous range of
// lines in the file.
type ChunkMatch struct {
DebugScore string

// Content is a contiguous range of complete lines that fully contains Ranges.
Content []byte
// ContentStart is the location (inclusive) of the beginning of content
// relative to the beginning of the file. It will always be at the
// beginning of a line (Column will always be 1).
ContentStart Location

// FileName indicates whether this match is a match on the file name, in
// which case Content will contain the file name.
FileName bool

// Ranges is a set of matching ranges within this chunk. Each range is relative
// to the beginning of the file (not the beginning of Content).
Expand All @@ -153,8 +148,16 @@ type ChunkMatch struct {
// its length will equal that of Ranges. Any of its elements may be nil.
SymbolInfo []*Symbol

Score float64
DebugScore string
// FileName indicates whether this match is a match on the file name, in
// which case Content will contain the file name.
FileName bool

// ContentStart is the location (inclusive) of the beginning of content
// relative to the beginning of the file. It will always be at the
// beginning of a line (Column will always be 1).
ContentStart Location

Score float64
}

func (cm *ChunkMatch) sizeBytes() (sz uint64) {
Expand Down
29 changes: 29 additions & 0 deletions api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package zoekt // import "github.com/sourcegraph/zoekt"
import (
"bytes"
"encoding/gob"
"reflect"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -136,3 +137,31 @@ func TestSizeBytesChunkMatches(t *testing.T) {
t.Fatalf("want %d, got %d", wantBytes, cm.sizeBytes())
}
}

func TestMatchSize(t *testing.T) {
cases := []struct {
v any
size int
}{{
v: FileMatch{},
size: 256,
}, {
v: ChunkMatch{},
size: 112,
}, {
v: candidateMatch{},
size: 72,
}, {
v: candidateChunk{},
size: 40,
}}
for _, c := range cases {
got := reflect.TypeOf(c.v).Size()
if int(got) != c.size {
t.Errorf(`sizeof struct %T has changed from %d to %d.
These are match structs that occur a lot in memory, so we optimize size.
When changing, please ensure there isn't unnecessary padding via the
tool fieldalignment then update this test.`, c.v, c.size, got)
}
}
}
6 changes: 3 additions & 3 deletions contentprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,11 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
}

type candidateChunk struct {
candidates []*candidateMatch
firstLine uint32 // 1-based, inclusive
lastLine uint32 // 1-based, inclusive
minOffset uint32 // 0-based, inclusive
maxOffset uint32 // 0-based, exclusive
candidates []*candidateMatch
}

// chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
Expand Down Expand Up @@ -566,8 +566,8 @@ func findSection(secs []DocumentSection, off, sz uint32) (int, bool) {

func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) {
type debugScore struct {
score float64
what string
score float64
}

score := &debugScore{}
Expand Down Expand Up @@ -654,8 +654,8 @@ func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch,

func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) {
type debugScore struct {
score float64
what string
score float64
}

score := &debugScore{}
Expand Down
4 changes: 2 additions & 2 deletions hititer.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ type hitIterator interface {

// distanceHitIterator looks for hits at a fixed distance apart.
type distanceHitIterator struct {
started bool
distance uint32
i1 hitIterator
i2 hitIterator
distance uint32
started bool
}

func (i *distanceHitIterator) String() string {
Expand Down
16 changes: 10 additions & 6 deletions matchiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,25 @@ import (
)

// candidateMatch is a candidate match for a substring.
//
// Note: a lot of these can be in memory, so think about fieldalignment when
// modify the fields of this structure.
type candidateMatch struct {
caseSensitive bool
fileName bool
symbol bool
symbolIdx uint32

substrBytes []byte
substrLowered []byte

file uint32
file uint32
symbolIdx uint32

// Offsets are relative to the start of the filename or file contents.
runeOffset uint32
byteOffset uint32
byteMatchSz uint32

// bools at end for struct field alignment
caseSensitive bool
fileName bool
symbol bool
}

// Matches content against the substring, and populates byteMatchSz on success
Expand Down

0 comments on commit 7e44ea7

Please sign in to comment.