Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always include trailing newline #747

Merged
merged 11 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ type ChunkMatch struct {
DebugScore string

// Content is a contiguous range of complete lines that fully contains Ranges.
// Lines will always include their terminating newline (if it exists).
Content []byte

// Ranges is a set of matching ranges within this chunk. Each range is relative
Expand Down Expand Up @@ -224,8 +225,12 @@ func (l *Location) sizeBytes() uint64 {
// LineMatch holds the matches within a single line in a file.
type LineMatch struct {
// The line in which a match was found.
Line []byte
LineStart int
Line []byte
// The byte offset of the first byte of the line.
LineStart int
// The byte offset of the first byte past the end of the line.
// This is usually the byte after the terminating newline, but can also be
// the end of the file if there is no terminating newline
LineEnd int
LineNumber int

Expand Down
95 changes: 37 additions & 58 deletions contentprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,17 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin
var result []LineMatch
for len(ms) > 0 {
m := ms[0]
num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset)
num := p.newlines().atOffset(m.byteOffset)
lineStart := int(p.newlines().lineStart(num))
nextLineStart := int(p.newlines().lineStart(num + 1))

var lineCands []*candidateMatch

endMatch := m.byteOffset + m.byteMatchSz

for len(ms) > 0 {
m := ms[0]
if int(m.byteOffset) <= lineEnd {
if int(m.byteOffset) < nextLineStart {
endMatch = m.byteOffset + m.byteMatchSz
lineCands = append(lineCands, m)
ms = ms[1:]
Expand All @@ -264,7 +266,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin
log.Panicf(
"%s %v infinite loop: num %d start,end %d,%d, offset %d",
p.id.fileName(p.idx), p.id.metaData,
num, lineStart, lineEnd,
num, lineStart, nextLineStart,
m.byteOffset)
}

Expand All @@ -273,22 +275,22 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin
// Due to merging matches, we may have a match that
// crosses a line boundary. Prevent confusion by
// taking lines until we pass the last match
for lineEnd < len(data) && endMatch > uint32(lineEnd) {
next := bytes.IndexByte(data[lineEnd+1:], '\n')
for nextLineStart < len(data) && endMatch > uint32(nextLineStart) {
next := bytes.IndexByte(data[nextLineStart:], '\n')
if next == -1 {
lineEnd = len(data)
nextLineStart = len(data)
} else {
// TODO(hanwen): test that checks "+1" part here.
lineEnd += next + 1
nextLineStart += next + 1
}
}

finalMatch := LineMatch{
LineStart: lineStart,
LineEnd: lineEnd,
LineEnd: nextLineStart,
LineNumber: num,
}
finalMatch.Line = data[lineStart:lineEnd]
finalMatch.Line = data[lineStart:nextLineStart]

if numContextLines > 0 {
finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num)
Expand Down Expand Up @@ -340,19 +342,18 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
for _, cm := range chunk.candidates {
startOffset := cm.byteOffset
endOffset := cm.byteOffset + cm.byteMatchSz
startLine, startLineOffset, _ := newlines.atOffset(startOffset)
endLine, endLineOffset, _ := newlines.atOffset(endOffset)
startLine, endLine := newlines.offsetRangeToLineRange(startOffset, endOffset)

ranges = append(ranges, Range{
Start: Location{
ByteOffset: startOffset,
LineNumber: uint32(startLine),
Column: columnHelper.get(startLineOffset, startOffset),
Column: columnHelper.get(int(newlines.lineStart(startLine)), startOffset),
camdencheek marked this conversation as resolved.
Show resolved Hide resolved
},
End: Location{
ByteOffset: endOffset,
LineNumber: uint32(endLine),
Column: columnHelper.get(endLineOffset, endOffset),
Column: columnHelper.get(int(newlines.lineStart(endLine)), endOffset),
},
})
}
Expand All @@ -361,7 +362,7 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
if firstLineNumber < 1 {
firstLineNumber = 1
}
firstLineStart, _ := newlines.lineBounds(firstLineNumber)
firstLineStart := newlines.lineStart(firstLineNumber)

chunkMatches = append(chunkMatches, ChunkMatch{
Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1),
Expand Down Expand Up @@ -399,8 +400,7 @@ func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines in
for _, m := range ms {
startOffset := m.byteOffset
endOffset := m.byteOffset + m.byteMatchSz
firstLine, _, _ := newlines.atOffset(startOffset)
lastLine, _, _ := newlines.atOffset(endOffset)
firstLine, lastLine := newlines.offsetRangeToLineRange(startOffset, endOffset)

if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines {
// If a new chunk created with the current candidateMatch would
Expand Down Expand Up @@ -471,45 +471,39 @@ type newlines struct {
}

// atOffset returns the line containing the offset. If the offset lands on
// the newline ending line M, we return M. The line is characterized
// by its linenumber (base-1, byte index of line start, byte index of
// line end). The line end is the index of a newline, or the filesize
// (if matching the last line of the file.)
func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
// the newline ending line M, we return M.
func (nls newlines) atOffset(offset uint32) (lineNumber int) {
idx := sort.Search(len(nls.locs), func(n int) bool {
return nls.locs[n] >= offset
})

start, end := nls.lineBounds(idx + 1)
return idx + 1, int(start), int(end)
return idx + 1
}

// lineBounds returns the byte offsets of the start and end of the 1-based
// lineNumber. The end offset is exclusive and will not contain the line-ending
// newline. If the line number is out of range of the lines in the file, start
// and end will be clamped to [0,fileSize].
func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {
Comment on lines -487 to -491
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I split lineBounds into lineStart and lineEnd because 1) there were often places where we only needed one or the other, and 2) it let me add an option to exclude the trailing newline for lineEnd more easily.

// lineStart returns the byte offset of the beginning of the given line.
// lineNumber is 1-based. If lineNumber is out of range of the lines in the
// file, the return value will be clamped to [0,fileSize].
func (nls newlines) lineStart(lineNumber int) uint32 {
// nls.locs[0] + 1 is the start of the 2nd line of data.
startIdx := lineNumber - 2
endIdx := lineNumber - 1

if startIdx < 0 {
start = 0
return 0
} else if startIdx >= len(nls.locs) {
start = nls.fileSize
return nls.fileSize
} else {
start = nls.locs[startIdx] + 1
}

if endIdx < 0 {
end = 0
} else if endIdx >= len(nls.locs) {
end = nls.fileSize
} else {
end = nls.locs[endIdx]
return nls.locs[startIdx] + 1
}
}

return start, end
// offsetRangeToLineRange returns range of lines that fully contains the given byte range.
// The inputs are 0-based byte offsets into the file representing the (exclusive) range [startOffset, endOffset).
// The return values are 1-based line numbers representing the (inclusive) range [startLine, endLine].
func (nls newlines) offsetRangeToLineRange(startOffset, endOffset uint32) (startLine, endLine int) {
startLine = nls.atOffset(startOffset)
endLine = nls.atOffset(
max(startOffset, max(endOffset, 1)-1), // clamp endOffset and prevent underflow
)
return startLine, endLine
}

// getLines returns a slice of data containing the lines [low, high).
Expand All @@ -519,22 +513,7 @@ func (nls newlines) getLines(data []byte, low, high int) []byte {
return nil
}

lowStart, _ := nls.lineBounds(low)
_, highEnd := nls.lineBounds(high - 1)

// Drop any trailing newline. Editors do not treat a trailing newline as
// the start of a new line, so we should not either. lineBounds clamps to
// len(data) when an out-of-bounds line is requested.
//
// As an example, if we request lines 1-5 from a file with contents
// `one\ntwo\nthree\n`, we should return `one\ntwo\nthree` because those are
// the three "lines" in the file, separated by newlines.
if highEnd == uint32(len(data)) && bytes.HasSuffix(data, []byte{'\n'}) {
highEnd = highEnd - 1
lowStart = min(lowStart, highEnd)
}
Comment on lines -525 to -535
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the culprit of the panics


return data[lowStart:highEnd]
return data[nls.lineStart(low):nls.lineStart(high)]
}

const (
Expand Down
46 changes: 25 additions & 21 deletions contentprovider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ func TestGetLines(t *testing.T) {
for _, content := range contents {
t.Run("", func(t *testing.T) {
newLines := getNewlines(content)
// Trim the last newline before splitting because a trailing newline does not constitute a new line
lines := bytes.Split(bytes.TrimSuffix(content, []byte{'\n'}), []byte{'\n'})
lines := bytes.SplitAfter(content, []byte{'\n'})
if len(lines) > 0 && len(lines[len(lines)-1]) == 0 {
// A trailing newline does not delimit an empty line at the end of a file
lines = lines[:len(lines)-1]
}
wantGetLines := func(low, high int) []byte {
low--
high--
Expand All @@ -51,7 +54,7 @@ func TestGetLines(t *testing.T) {
if high > len(lines) {
high = len(lines)
}
return bytes.Join(lines[low:high], []byte{'\n'})
return bytes.Join(lines[low:high], nil)
}

for low := -1; low <= len(lines)+2; low++ {
Expand All @@ -72,32 +75,32 @@ func TestAtOffset(t *testing.T) {
data []byte
offset uint32
lineNumber int
lineStart int
lineEnd int
lineStart uint32
lineEnd uint32
}{{
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 0,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 6,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 2,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 2,
lineNumber: 1, lineStart: 0, lineEnd: 6,
lineNumber: 1, lineStart: 0, lineEnd: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 7,
lineNumber: 2, lineStart: 7, lineEnd: 14,
lineNumber: 2, lineStart: 7, lineEnd: 15,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 11,
lineNumber: 2, lineStart: 7, lineEnd: 14,
lineNumber: 2, lineStart: 7, lineEnd: 15,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
offset: 15,
Expand All @@ -109,11 +112,11 @@ func TestAtOffset(t *testing.T) {
}, {
data: []byte("\n\n"),
offset: 0,
lineNumber: 1, lineStart: 0, lineEnd: 0,
lineNumber: 1, lineStart: 0, lineEnd: 1,
}, {
data: []byte("\n\n"),
offset: 1,
lineNumber: 2, lineStart: 1, lineEnd: 1,
lineNumber: 2, lineStart: 1, lineEnd: 2,
}, {
data: []byte("\n\n"),
offset: 3,
Expand All @@ -127,14 +130,14 @@ func TestAtOffset(t *testing.T) {
for _, tt := range cases {
t.Run("", func(t *testing.T) {
nls := getNewlines(tt.data)
gotLineNumber, gotLineStart, gotLineEnd := nls.atOffset(tt.offset)
gotLineNumber := nls.atOffset(tt.offset)
if gotLineNumber != tt.lineNumber {
t.Fatalf("expected line number %d, got %d", tt.lineNumber, gotLineNumber)
}
if gotLineStart != tt.lineStart {
if gotLineStart := nls.lineStart(gotLineNumber); gotLineStart != tt.lineStart {
t.Fatalf("expected line start %d, got %d", tt.lineStart, gotLineStart)
}
if gotLineEnd != tt.lineEnd {
if gotLineEnd := nls.lineStart(gotLineNumber + 1); gotLineEnd != tt.lineEnd {
t.Fatalf("expected line end %d, got %d", tt.lineEnd, gotLineEnd)
}
})
Expand All @@ -150,11 +153,11 @@ func TestLineBounds(t *testing.T) {
}{{
data: []byte("0.2.4.\n7.9.11.\n"),
lineNumber: 1,
start: 0, end: 6,
start: 0, end: 7,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
lineNumber: 2,
start: 7, end: 14,
start: 7, end: 15,
}, {
data: []byte("0.2.4.\n7.9.11.\n"),
lineNumber: 0,
Expand All @@ -170,11 +173,11 @@ func TestLineBounds(t *testing.T) {
}, {
data: []byte("\n\n"),
lineNumber: 1,
start: 0, end: 0,
start: 0, end: 1,
}, {
data: []byte("\n\n"),
lineNumber: 2,
start: 1, end: 1,
start: 1, end: 2,
}, {
data: []byte("\n\n"),
lineNumber: 3,
Expand All @@ -184,10 +187,11 @@ func TestLineBounds(t *testing.T) {
for _, tt := range cases {
t.Run("", func(t *testing.T) {
nls := getNewlines(tt.data)
gotStart, gotEnd := nls.lineBounds(tt.lineNumber)
gotStart := nls.lineStart(tt.lineNumber)
if gotStart != tt.start {
t.Fatalf("expected line start %d, got %d", tt.start, gotStart)
}
gotEnd := nls.lineStart(tt.lineNumber + 1)
if gotEnd != tt.end {
t.Fatalf("expected line end %d, got %d", tt.end, gotEnd)
}
Expand Down
Loading
Loading