diff --git a/contentprovider.go b/contentprovider.go index a6b711c50..c66b1f471 100644 --- a/contentprovider.go +++ b/contentprovider.go @@ -342,13 +342,7 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte for _, cm := range chunk.candidates { startOffset := cm.byteOffset endOffset := cm.byteOffset + cm.byteMatchSz - startLine := newlines.atOffset(startOffset) - endLine := newlines.atOffset( - // We want the line of the last byte in the match, not the first byte outside of the match. - // For a zero-length match, endOffset-1 could be before match start, so fall back to the - // byte after the match (as we do for startLine), not before. - max(startOffset, max(endOffset, 1)-1 /* prevent underflow */), - ) + startLine, endLine := newlines.offsetRangeToLineRange(startOffset, endOffset) ranges = append(ranges, Range{ Start: Location{ @@ -406,8 +400,7 @@ func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines in for _, m := range ms { startOffset := m.byteOffset endOffset := m.byteOffset + m.byteMatchSz - firstLine := newlines.atOffset(startOffset) - lastLine := newlines.atOffset(max(startOffset, endOffset-1)) + firstLine, lastLine := newlines.offsetRangeToLineRange(startOffset, endOffset) if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines { // If a new chunk created with the current candidateMatch would @@ -502,6 +495,17 @@ func (nls newlines) lineStart(lineNumber int) uint32 { } } +// offsetRangeToLineRange returns range of lines that fully contains the given byte range. +// The inputs are 0-based byte offsets into the file representing the (exclusive) range [startOffset, endOffset). +// The return values are 1-based line numbers representing the (inclusive) range [startLine, endLine]. +func (nls newlines) offsetRangeToLineRange(startOffset, endOffset uint32) (startLine, endLine int) { + startLine = nls.atOffset(startOffset) + endLine = nls.atOffset( + max(startOffset, max(endOffset, 1)-1), // clamp endOffset and prevent underflow + ) + return startLine, endLine +} + // getLines returns a slice of data containing the lines [low, high). // low is 1-based and inclusive. high is 1-based and exclusive. func (nls newlines) getLines(data []byte, low, high int) []byte { diff --git a/index_test.go b/index_test.go index 5a2b0091a..1a0653df9 100644 --- a/index_test.go +++ b/index_test.go @@ -201,8 +201,8 @@ func (s *memSeeker) Size() (uint32, error) { func TestNewlines(t *testing.T) { b := testIndexBuilder(t, nil, + // -----------------------------------------012345-678901-234 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) - // -------------------------------------------012345-678901-234 t.Run("LineMatches", func(t *testing.T) { sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})