Skip to content

Commit

Permalink
always include newlines as part of line
Browse files Browse the repository at this point in the history
  • Loading branch information
camdencheek committed Apr 10, 2024
1 parent ab1b8f0 commit b1aa903
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 65 deletions.
84 changes: 45 additions & 39 deletions contentprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin
var result []LineMatch
for len(ms) > 0 {
m := ms[0]
num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset)
num := p.newlines().atOffset(m.byteOffset)
lineStart := int(p.newlines().lineStart(num))
lineEnd := int(p.newlines().lineEnd(num, true))

var lineCands []*candidateMatch

Expand Down Expand Up @@ -316,19 +318,24 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
for _, cm := range chunk.candidates {
startOffset := cm.byteOffset
endOffset := cm.byteOffset + cm.byteMatchSz
startLine, startLineOffset, _ := newlines.atOffset(startOffset)
endLine, endLineOffset, _ := newlines.atOffset(endOffset)
startLine := newlines.atOffset(startOffset)
endLine := newlines.atOffset(
// We want the line of the last byte in the match, not the first byte outside of the match.
// For a zero-length match, endOffset-1 could be before match start, so fall back to the
// byte after the match (as we do for startLine), not before.
max(startOffset, endOffset-1),
)

ranges = append(ranges, Range{
Start: Location{
ByteOffset: startOffset,
LineNumber: uint32(startLine),
Column: columnHelper.get(startLineOffset, startOffset),
Column: columnHelper.get(int(newlines.lineStart(startLine)), startOffset),
},
End: Location{
ByteOffset: endOffset,
LineNumber: uint32(endLine),
Column: columnHelper.get(endLineOffset, endOffset),
Column: columnHelper.get(int(newlines.lineStart(endLine)), endOffset),
},
})
}
Expand All @@ -337,7 +344,7 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
if firstLineNumber < 1 {
firstLineNumber = 1
}
firstLineStart, _ := newlines.lineBounds(firstLineNumber)
firstLineStart := newlines.lineStart(firstLineNumber)

chunkMatches = append(chunkMatches, ChunkMatch{
Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1),
Expand Down Expand Up @@ -375,8 +382,8 @@ func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines in
for _, m := range ms {
startOffset := m.byteOffset
endOffset := m.byteOffset + m.byteMatchSz
firstLine, _, _ := newlines.atOffset(startOffset)
lastLine, _, _ := newlines.atOffset(endOffset)
firstLine := newlines.atOffset(startOffset)
lastLine := newlines.atOffset(endOffset - 1)

if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines {
// If a new chunk created with the current candidateMatch would
Expand Down Expand Up @@ -451,41 +458,52 @@ type newlines struct {
// by its linenumber (base-1, byte index of line start, byte index of
// line end). The line end is the index of a newline, or the filesize
// (if matching the last line of the file.)
func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
func (nls newlines) atOffset(offset uint32) (lineNumber int) {
idx := sort.Search(len(nls.locs), func(n int) bool {
return nls.locs[n] >= offset
})

start, end := nls.lineBounds(idx + 1)
return idx + 1, int(start), int(end)
// TODO: make sure this makes sense not including the newline
return idx + 1
}

// lineBounds returns the byte offsets of the start and end of the 1-based
// lineNumber. The end offset is exclusive and will not contain the line-ending
// newline. If the line number is out of range of the lines in the file, start
// and end will be clamped to [0,fileSize].
func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {
// lineStart returns the byte offset of the beginning of the given line.
// lineNumber is 1-based. If lineNumber is out of range of the lines in the
// file, the return value will be clamped to [0,fileSize].
func (nls newlines) lineStart(lineNumber int) uint32 {
// nls.locs[0] + 1 is the start of the 2nd line of data.
startIdx := lineNumber - 2
endIdx := lineNumber - 1

if startIdx < 0 {
start = 0
return 0
} else if startIdx >= len(nls.locs) {
start = nls.fileSize
return nls.fileSize
} else {
start = nls.locs[startIdx] + 1
return nls.locs[startIdx] + 1
}
}

// lineEnd returns the (exclusive) byte offset pointing immediately after the
// last byte of the line. lineNumber is 1-based. If lineNumber is out
// of range of the lines in the file, the return value will be clamped to
// [0,fileSize].
//
// If trimNewline is set, the offset exclude any line-terminating newline.
// Note: this does _not_ trim any carriage returns and should probably not be
// used by default. It only exists for backwards compatibility.
func (nls newlines) lineEnd(lineNumber int, trimNewline bool) uint32 {
// nls.locs[0] + 1 is the start of the 2nd line of data.
endIdx := lineNumber - 1

if endIdx < 0 {
end = 0
return 0
} else if endIdx >= len(nls.locs) {
end = nls.fileSize
return nls.fileSize
} else if trimNewline {
return nls.locs[endIdx]
} else {
end = nls.locs[endIdx]
return nls.locs[endIdx] + 1
}

return start, end
}

// getLines returns a slice of data containing the lines [low, high).
Expand All @@ -495,20 +513,8 @@ func (nls newlines) getLines(data []byte, low, high int) []byte {
return nil
}

lowStart, _ := nls.lineBounds(low)
_, highEnd := nls.lineBounds(high - 1)

// Drop any trailing newline. Editors do not treat a trailing newline as
// the start of a new line, so we should not either. lineBounds clamps to
// len(data) when an out-of-bounds line is requested.
//
// As an example, if we request lines 1-5 from a file with contents
// `one\ntwo\nthree\n`, we should return `one\ntwo\nthree` because those are
// the three "lines" in the file, separated by newlines.
if highEnd == uint32(len(data)) && bytes.HasSuffix(data, []byte{'\n'}) {
highEnd = highEnd - 1
lowStart = min(lowStart, highEnd)
}
lowStart := nls.lineStart(low)
highEnd := nls.lineEnd(high-1, false)

return data[lowStart:highEnd]
}
Expand Down
46 changes: 25 additions & 21 deletions contentprovider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ func TestGetLines(t *testing.T) {
for _, content := range contents {
t.Run("", func(t *testing.T) {
newLines := getNewlines(content)
// Trim the last newline before splitting because a trailing newline does not constitute a new line
lines := bytes.Split(bytes.TrimSuffix(content, []byte{'\n'}), []byte{'\n'})
lines := bytes.SplitAfter(content, []byte{'\n'})
if len(lines) > 0 && len(lines[len(lines)-1]) == 0 {
// A trailing newline does not delimit an empty line at the end of a file
lines = lines[:len(lines)-1]
}
wantGetLines := func(low, high int) []byte {
low--
high--
Expand All @@ -51,7 +54,7 @@ func TestGetLines(t *testing.T) {
if high > len(lines) {
high = len(lines)
}
return bytes.Join(lines[low:high], []byte{'\n'})
return bytes.Join(lines[low:high], nil)
}

for low := -1; low <= len(lines)+2; low++ {
Expand All @@ -72,32 +75,32 @@ func TestAtOffset(t *testing.T) {
data []byte
offset uint32
lineNumber int
lineStart int
lineEnd int
lineStart uint32
lineEnd uint32
}{{
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 0,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 6,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 2,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 2,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 7,
lineNumber: 2, lineStart: 7, lineEnd: 14,
lineNumber: 2, lineStart: 7, lineEnd: 15,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 11,
lineNumber: 2, lineStart: 7, lineEnd: 14,
lineNumber: 2, lineStart: 7, lineEnd: 15,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 15,
Expand All @@ -109,11 +112,11 @@ func TestAtOffset(t *testing.T) {
}, {
data: []byte("\n\n"),
offset: 0,
lineNumber: 1, lineStart: 0, lineEnd: 0,
lineNumber: 1, lineStart: 0, lineEnd: 1,
}, {
data: []byte("\n\n"),
offset: 1,
lineNumber: 2, lineStart: 1, lineEnd: 1,
lineNumber: 2, lineStart: 1, lineEnd: 2,
}, {
data: []byte("\n\n"),
offset: 3,
Expand All @@ -127,14 +130,14 @@ func TestAtOffset(t *testing.T) {
for _, tt := range cases {
t.Run("", func(t *testing.T) {
nls := getNewlines(tt.data)
gotLineNumber, gotLineStart, gotLineEnd := nls.atOffset(tt.offset)
gotLineNumber := nls.atOffset(tt.offset)
if gotLineNumber != tt.lineNumber {
t.Fatalf("expected line number %d, got %d", tt.lineNumber, gotLineNumber)
}
if gotLineStart != tt.lineStart {
if gotLineStart := nls.lineStart(gotLineNumber); gotLineStart != tt.lineStart {
t.Fatalf("expected line start %d, got %d", tt.lineStart, gotLineStart)
}
if gotLineEnd != tt.lineEnd {
if gotLineEnd := nls.lineEnd(gotLineNumber, false); gotLineEnd != tt.lineEnd {
t.Fatalf("expected line end %d, got %d", tt.lineEnd, gotLineEnd)
}
})
Expand All @@ -150,11 +153,11 @@ func TestLineBounds(t *testing.T) {
}{{
data: []byte("0.2.4.\n7.9.11.\n"),
lineNumber: 1,
start: 0, end: 6,
start: 0, end: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
lineNumber: 2,
start: 7, end: 14,
start: 7, end: 15,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
lineNumber: 0,
Expand All @@ -170,11 +173,11 @@ func TestLineBounds(t *testing.T) {
}, {
data: []byte("\n\n"),
lineNumber: 1,
start: 0, end: 0,
start: 0, end: 1,
}, {
data: []byte("\n\n"),
lineNumber: 2,
start: 1, end: 1,
start: 1, end: 2,
}, {
data: []byte("\n\n"),
lineNumber: 3,
Expand All @@ -184,10 +187,11 @@ func TestLineBounds(t *testing.T) {
for _, tt := range cases {
t.Run("", func(t *testing.T) {
nls := getNewlines(tt.data)
gotStart, gotEnd := nls.lineBounds(tt.lineNumber)
gotStart := nls.lineStart(tt.lineNumber)
if gotStart != tt.start {
t.Fatalf("expected line start %d, got %d", tt.start, gotStart)
}
gotEnd := nls.lineEnd(tt.lineNumber, false)
if gotEnd != tt.end {
t.Fatalf("expected line end %d, got %d", tt.end, gotEnd)
}
Expand Down
8 changes: 4 additions & 4 deletions index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func TestNewlines(t *testing.T) {
want := []FileMatch{{
FileName: "filename",
ChunkMatches: []ChunkMatch{{
Content: []byte("line2"),
Content: []byte("line2\n"),
ContentStart: Location{
ByteOffset: 6,
LineNumber: 2,
Expand Down Expand Up @@ -2452,7 +2452,7 @@ func TestIOStats(t *testing.T) {
res := searchForTest(t, b, q)

// 4096 (content) + 2 (overhead: newlines or doc sections)
if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
if got, want := res.Stats.ContentBytesLoaded, int64(4100); got != want {
t.Errorf("got content I/O %d, want %d", got, want)
}

Expand Down Expand Up @@ -3243,7 +3243,7 @@ func TestLineAnd(t *testing.T) {
}
t.Run("LineMatches", func(t *testing.T) {
res := searchForTest(t, b, &q)
wantRegexpCount := 1
wantRegexpCount := 2
if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
}
Expand All @@ -3254,7 +3254,7 @@ func TestLineAnd(t *testing.T) {

t.Run("ChunkMatches", func(t *testing.T) {
res := searchForTest(t, b, &q, chunkOpts)
wantRegexpCount := 1
wantRegexpCount := 2 // TODO: justify this change
if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
}
Expand Down
4 changes: 3 additions & 1 deletion matchtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,11 +694,13 @@ func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matc
lines := make([]lineRange, 0, len(t.children[fewestChildren].(*substrMatchTree).current))
prev := -1
for _, candidate := range t.children[fewestChildren].(*substrMatchTree).current {
line, byteStart, byteEnd := cp.newlines().atOffset(candidate.byteOffset)
line := cp.newlines().atOffset(candidate.byteOffset)
if line == prev {
continue
}
prev = line
byteStart := int(cp.newlines().lineStart(line))
byteEnd := int(cp.newlines().lineEnd(line, false))
lines = append(lines, lineRange{byteStart, byteEnd})
}

Expand Down

0 comments on commit b1aa903

Please sign in to comment.