diff --git a/api.go b/api.go index da47cef83..e3f90852f 100644 --- a/api.go +++ b/api.go @@ -34,52 +34,53 @@ const interfaceBytes uint64 = 16 // FileMatch contains all the matches within a file. type FileMatch struct { - // Ranking; the higher, the better. - Score float64 // TODO - hide this field? - - // For debugging. Needs DebugScore set, but public so tests in - // other packages can print some diagnostics. - Debug string - FileName string // Repository is the globally unique name of the repo of the // match Repository string - Branches []string + + // SubRepositoryName is the globally unique name of the repo, + // if it came from a subrepository + SubRepositoryName string + + // SubRepositoryPath holds the prefix where the subrepository + // was mounted. + SubRepositoryPath string + + // Commit SHA1 (hex) of the (sub)repo holding the file. + Version string + + // Detected language of the result. + Language string + + // For debugging. Needs DebugScore set, but public so tests in + // other packages can print some diagnostics. + Debug string + + Branches []string // One of LineMatches or ChunkMatches will be returned depending on whether // the SearchOptions.ChunkMatches is set. LineMatches []LineMatch ChunkMatches []ChunkMatch - // RepositoryID is a Sourcegraph extension. This is the ID of Repository in - // Sourcegraph. - RepositoryID uint32 - - // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to - // order results from different repositories relative to each other. - RepositoryPriority float64 - // Only set if requested Content []byte // Checksum of the content. Checksum []byte - // Detected language of the result. - Language string - - // SubRepositoryName is the globally unique name of the repo, - // if it came from a subrepository - SubRepositoryName string + // Ranking; the higher, the better. + Score float64 // TODO - hide this field? - // SubRepositoryPath holds the prefix where the subrepository - // was mounted. - SubRepositoryPath string + // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to + // order results from different repositories relative to each other. + RepositoryPriority float64 - // Commit SHA1 (hex) of the (sub)repo holding the file. - Version string + // RepositoryID is a Sourcegraph extension. This is the ID of Repository in + // Sourcegraph. + RepositoryID uint32 } func (m *FileMatch) sizeBytes() (sz uint64) { @@ -134,16 +135,10 @@ func (m *FileMatch) sizeBytes() (sz uint64) { // ChunkMatch is a set of non-overlapping matches within a contiguous range of // lines in the file. type ChunkMatch struct { + DebugScore string + // Content is a contiguous range of complete lines that fully contains Ranges. Content []byte - // ContentStart is the location (inclusive) of the beginning of content - // relative to the beginning of the file. It will always be at the - // beginning of a line (Column will always be 1). - ContentStart Location - - // FileName indicates whether this match is a match on the file name, in - // which case Content will contain the file name. - FileName bool // Ranges is a set of matching ranges within this chunk. Each range is relative // to the beginning of the file (not the beginning of Content). @@ -153,8 +148,16 @@ type ChunkMatch struct { // its length will equal that of Ranges. Any of its elements may be nil. SymbolInfo []*Symbol - Score float64 - DebugScore string + // FileName indicates whether this match is a match on the file name, in + // which case Content will contain the file name. + FileName bool + + // ContentStart is the location (inclusive) of the beginning of content + // relative to the beginning of the file. It will always be at the + // beginning of a line (Column will always be 1). + ContentStart Location + + Score float64 } func (cm *ChunkMatch) sizeBytes() (sz uint64) { diff --git a/api_test.go b/api_test.go index 8d26251d8..fe860d58c 100644 --- a/api_test.go +++ b/api_test.go @@ -17,6 +17,7 @@ package zoekt // import "github.com/sourcegraph/zoekt" import ( "bytes" "encoding/gob" + "reflect" "strings" "testing" "time" @@ -136,3 +137,31 @@ func TestSizeBytesChunkMatches(t *testing.T) { t.Fatalf("want %d, got %d", wantBytes, cm.sizeBytes()) } } + +func TestMatchSize(t *testing.T) { + cases := []struct { + v any + size int + }{{ + v: FileMatch{}, + size: 256, + }, { + v: ChunkMatch{}, + size: 112, + }, { + v: candidateMatch{}, + size: 72, + }, { + v: candidateChunk{}, + size: 40, + }} + for _, c := range cases { + got := reflect.TypeOf(c.v).Size() + if int(got) != c.size { + t.Errorf(`sizeof struct %T has changed from %d to %d. +These are match structs that occur a lot in memory, so we optimize size. +When changing, please ensure there isn't unnecessary padding via the +tool fieldalignment then update this test.`, c.v, c.size, got) + } + } +} diff --git a/contentprovider.go b/contentprovider.go index 68c41cfe1..89ddebbb5 100644 --- a/contentprovider.go +++ b/contentprovider.go @@ -369,11 +369,11 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte } type candidateChunk struct { + candidates []*candidateMatch firstLine uint32 // 1-based, inclusive lastLine uint32 // 1-based, inclusive minOffset uint32 // 0-based, inclusive maxOffset uint32 // 0-based, exclusive - candidates []*candidateMatch } // chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent @@ -566,8 +566,8 @@ func findSection(secs []DocumentSection, off, sz uint32) (int, bool) { func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) { type debugScore struct { - score float64 what string + score float64 } score := &debugScore{} @@ -654,8 +654,8 @@ func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) { type debugScore struct { - score float64 what string + score float64 } score := &debugScore{} diff --git a/hititer.go b/hititer.go index 89204e6d6..01a58d1e1 100644 --- a/hititer.go +++ b/hititer.go @@ -35,10 +35,10 @@ type hitIterator interface { // distanceHitIterator looks for hits at a fixed distance apart. type distanceHitIterator struct { - started bool - distance uint32 i1 hitIterator i2 hitIterator + distance uint32 + started bool } func (i *distanceHitIterator) String() string { diff --git a/matchiter.go b/matchiter.go index e7d3f39a2..68c6e4856 100644 --- a/matchiter.go +++ b/matchiter.go @@ -20,21 +20,25 @@ import ( ) // candidateMatch is a candidate match for a substring. +// +// Note: a lot of these can be in memory, so think about fieldalignment when +// modify the fields of this structure. type candidateMatch struct { - caseSensitive bool - fileName bool - symbol bool - symbolIdx uint32 - substrBytes []byte substrLowered []byte - file uint32 + file uint32 + symbolIdx uint32 // Offsets are relative to the start of the filename or file contents. runeOffset uint32 byteOffset uint32 byteMatchSz uint32 + + // bools at end for struct field alignment + caseSensitive bool + fileName bool + symbol bool } // Matches content against the substring, and populates byteMatchSz on success