Skip to content

Commit

Permalink
contentprovider: cache last newlines lookup
Browse files Browse the repository at this point in the history
I wanted to introduce another call to newlines.atOffset for the purpose
of scoring. I was worried about doubling the calls to this so introduced
a tiny (but hopefully effective) performance optimization.

Essentially we often look up multiple times on the same line, but then
the return of atOffset shouldn't change. So we introduce a tiny cache
for this.

We could do even more and if we are not on the same line, use the last
computed index to restrict which side of the nls.locs we look at.
However, there are no direct benchmarks for this and I haven't seen it
in profiles before. So just doing this minor optimization for fun.

Test Plan: go test
  • Loading branch information
keegancsmith committed Jan 17, 2024
1 parent 1adab43 commit 197d133
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
32 changes: 24 additions & 8 deletions contentprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,11 @@ func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines
}

func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int) []LineMatch {
newlines := p.newlines()
var result []LineMatch
for len(ms) > 0 {
m := ms[0]
num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset)
num, lineStart, lineEnd := newlines.atOffset(m.byteOffset)

var lineCands []*candidateMatch

Expand Down Expand Up @@ -267,8 +268,8 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin
finalMatch.Line = data[lineStart:lineEnd]

if numContextLines > 0 {
finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num)
finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines)
finalMatch.Before = newlines.getLines(data, num-numContextLines, num)
finalMatch.After = newlines.getLines(data, num+1, num+1+numContextLines)
}

for _, m := range lineCands {
Expand Down Expand Up @@ -455,27 +456,42 @@ type newlines struct {
// on this struct so we can safely know the length of the last line
// in the file since not all files end in a newline.
fileSize uint32

// PERF: These store the last computed value for atOffset. This is valuable
// since we often repeatedly look up these values for items on the same
// line.
lastLineNumber int
lastLineStart, lastLineEnd uint32
}

// atOffset returns the line containing the offset. If the offset lands on
// the newline ending line M, we return M. The line is characterized
// by its linenumber (base-1, byte index of line start, byte index of
// line end). The line end is the index of a newline, or the filesize
// (if matching the last line of the file.)
func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
func (nls *newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
// Use cache if we are on the same line as the last call
if offset >= nls.lastLineStart && offset < nls.lastLineEnd {
return nls.lastLineNumber, int(nls.lastLineStart), int(nls.lastLineEnd)
}

idx := sort.Search(len(nls.locs), func(n int) bool {
return nls.locs[n] >= offset
})

start, end := nls.lineBounds(idx + 1)
return idx + 1, int(start), int(end)
number := idx + 1
start, end := nls.lineBounds(number)

nls.lastLineNumber, nls.lastLineStart, nls.lastLineEnd = number, start, end

return number, int(start), int(end)
}

// lineBounds returns the byte offsets of the start and end of the 1-based
// lineNumber. The end offset is exclusive and will not contain the line-ending
// newline. If the line number is out of range of the lines in the file, start
// and end will be clamped to [0,fileSize].
func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {
func (nls *newlines) lineBounds(lineNumber int) (start, end uint32) {
// nls.locs[0] + 1 is the start of the 2nd line of data.
startIdx := lineNumber - 2
endIdx := lineNumber - 1
Expand All @@ -501,7 +517,7 @@ func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {

// getLines returns a slice of data containing the lines [low, high).
// low is 1-based and inclusive. high is 1-based and exclusive.
func (nls newlines) getLines(data []byte, low, high int) []byte {
func (nls *newlines) getLines(data []byte, low, high int) []byte {
if low >= high {
return nil
}
Expand Down
4 changes: 3 additions & 1 deletion matchtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -642,14 +642,16 @@ func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matc
}
}

newlines := cp.newlines()

type lineRange struct {
start int
end int
}
lines := make([]lineRange, 0, len(t.children[fewestChildren].(*substrMatchTree).current))
prev := -1
for _, candidate := range t.children[fewestChildren].(*substrMatchTree).current {
line, byteStart, byteEnd := cp.newlines().atOffset(candidate.byteOffset)
line, byteStart, byteEnd := newlines.atOffset(candidate.byteOffset)
if line == prev {
continue
}
Expand Down

0 comments on commit 197d133

Please sign in to comment.