Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Search: expect trailing newlines in chunk matches (#61247)
Browse files Browse the repository at this point in the history
This:
1) Bumps Zoekt to include sourcegraph/zoekt#747
2) updates all the consumers of our APIs to trim the trailing newline before splitting
3) updates searcher to also include trailing newlines in chunk matches
  • Loading branch information
camdencheek authored Apr 23, 2024
1 parent 8cdba45 commit 8edfc0f
Show file tree
Hide file tree
Showing 12 changed files with 72 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ function chunkToMatchGroup(chunk: ChunkMatch): MatchGroup {
endLine: range.end.line,
endCharacter: range.end.column,
}))
const plaintextLines = chunk.content.split(/\r?\n/)
const plaintextLines = chunk.content.replace(/\r?\n$/, '').split(/\r?\n/)
return {
plaintextLines,
highlightedHTMLRows: undefined, // populated lazily
Expand Down
2 changes: 1 addition & 1 deletion client/web-sveltekit/src/lib/search/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export function chunkToMatchGroup(chunk: ChunkMatch): MatchGroup {
endLine: range.end.line,
endCharacter: range.end.column,
}))
const plaintextLines = chunk.content.split(/\r?\n/)
const plaintextLines = chunk.content.replace(/\r?\n$/, '').split(/\r?\n/)
return {
plaintextLines,
highlightedHTMLRows: undefined, // populated lazily
Expand Down
21 changes: 10 additions & 11 deletions cmd/searcher/internal/search/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,13 @@ func chunksToMatches(buf []byte, chunks []rangeChunk, contextLines int32) []prot
func extendRangeToLines(inputRange protocol.Range, buf []byte) protocol.Range {
firstLineStart := lineStart(buf, inputRange.Start.Offset)
lastLineStart := lineStart(buf, inputRange.End.Offset)
lastLineEnd := lineEnd(buf, inputRange.End.Offset)
lastLineEnd := lineEnd(buf,
// We want the end of the line containing the last byte of the
// match, not the first byte after the match. In the case of a
// zero-width match between lines, prefer the line after rather
// than the line before (like we do for lineStart).
max(inputRange.End.Offset, max(inputRange.End.Offset, 1)-1 /* prevent underflow */),
)

return protocol.Range{
Start: protocol.Location{
Expand Down Expand Up @@ -113,12 +119,8 @@ func addContextLines(inputRange protocol.Range, buf []byte, contextLines int32)
precedingLinesAdded += 1
}

rest := buf[lastLineEnd:]
if bytes.HasPrefix(rest, []byte("\n")) && len(rest) > 1 {
lastLineEnd = lineEnd(buf, lastLineEnd+1)
succeedingLinesAdded += 1
} else if bytes.HasPrefix(rest, []byte("\r\n")) && len(rest) > 2 {
lastLineEnd = lineEnd(buf, lastLineEnd+2)
if int(lastLineEnd) < len(buf) {
lastLineEnd = lineEnd(buf, lastLineEnd)
succeedingLinesAdded += 1
}
}
Expand Down Expand Up @@ -150,10 +152,7 @@ func lineStart(buf []byte, offset int32) int32 {
func lineEnd(buf []byte, offset int32) int32 {
end := int32(len(buf))
if loc := bytes.IndexByte(buf[offset:], '\n'); loc >= 0 {
end = int32(loc) + offset
if bytes.HasSuffix(buf[:end], []byte("\r")) {
end -= 1
}
end = int32(loc) + offset + 1
}
return end
}
Expand Down
28 changes: 14 additions & 14 deletions cmd/searcher/internal/search/chunk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,61 +172,61 @@ func Test_addContext(t *testing.T) {
"\n",
0,
r(l(0, 0, 0), l(0, 0, 0)),
"",
"\n",
},
{
"\n",
1,
r(l(0, 0, 0), l(0, 0, 0)),
"",
"\n",
},
{
"\n\n\n",
0,
r(l(1, 1, 0), l(1, 1, 0)),
"",
"\n",
},
{
"\n\n\n\n",
1,
r(l(1, 1, 0), l(1, 1, 0)),
"\n\n",
"\n\n\n",
},
{
"\n\n\n\n",
2,
r(l(1, 1, 0), l(1, 1, 0)),
"\n\n\n",
"\n\n\n\n",
},
{
"abc\ndef\nghi\n",
0,
r(l(1, 0, 1), l(1, 0, 1)),
"abc",
"abc\n",
},
{
"abc\ndef\nghi\n",
1,
r(l(1, 0, 1), l(1, 0, 1)),
"abc\ndef",
"abc\ndef\n",
},
{
"abc\ndef\nghi\n",
2,
r(l(1, 0, 1), l(1, 0, 1)),
"abc\ndef\nghi",
"abc\ndef\nghi\n",
},
{
"abc\ndef\nghi",
0,
r(l(1, 0, 1), l(1, 0, 1)),
"abc",
"abc\n",
},
{
"abc\ndef\nghi",
1,
r(l(1, 0, 1), l(1, 0, 1)),
"abc\ndef",
"abc\ndef\n",
},
{
"abc\ndef\nghi",
Expand Down Expand Up @@ -256,7 +256,7 @@ func Test_addContext(t *testing.T) {
"abc\r\ndef\r\nghi\r\n",
1,
r(l(1, 0, 1), l(2, 0, 2)),
"abc\r\ndef",
"abc\r\ndef\r\n",
},
{
"abc\r\ndef\r\nghi",
Expand All @@ -268,19 +268,19 @@ func Test_addContext(t *testing.T) {
"\r\n",
0,
r(l(0, 0, 0), l(0, 0, 0)),
"",
"\r\n",
},
{
"\r\n",
1,
r(l(0, 0, 0), l(0, 0, 0)),
"",
"\r\n",
},
{
"abc\nd\xE2\x9D\x89f\nghi",
0,
r(l(4, 1, 0), l(5, 1, 1)),
"d\xE2\x9D\x89f",
"d\xE2\x9D\x89f\n",
},
{
"abc\nd\xE2\x9D\x89f\nghi",
Expand Down
7 changes: 7 additions & 0 deletions cmd/searcher/internal/search/hybrid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,14 @@ Hello world example in go`, typeFile},
Want: `
added.md:1:1:
hello world I am added
// No newline at end of chunk
changed.go:6:6:
fmt.Println("Hello world")
unchanged.md:1:1:
# Hello World
unchanged.md:3:3:
Hello world example in go
// No newline at end of chunk
`,
}, {
Name: "added",
Expand All @@ -171,6 +173,7 @@ Hello world example in go
Want: `
added.md:1:1:
hello world I am added
// No newline at end of chunk
`,
}, {
Name: "example",
Expand All @@ -180,6 +183,7 @@ hello world I am added
Want: `
unchanged.md:3:3:
Hello world example in go
// No newline at end of chunk
`,
}, {
Name: "boolean query",
Expand All @@ -199,6 +203,7 @@ Hello world example in go
Want: `
added.md:1:1:
hello world I am added
// No newline at end of chunk
changed.go:1:1:
package main
changed.go:6:6:
Expand All @@ -207,6 +212,7 @@ unchanged.md:1:1:
# Hello World
unchanged.md:3:3:
Hello world example in go
// No newline at end of chunk
`,
}, {
Name: "negated-pattern-example",
Expand Down Expand Up @@ -268,6 +274,7 @@ unchanged.md
changed.go
unchanged.md:3:3:
Hello world example in go
// No newline at end of chunk
`,
}, {
Name: "negated-pattern-path",
Expand Down
4 changes: 2 additions & 2 deletions cmd/searcher/internal/search/search_structural_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,14 +500,14 @@ func bar() {
expected := []protocol.FileMatch{{
Path: "main.go",
ChunkMatches: []protocol.ChunkMatch{{
Content: "func foo() {\n fmt.Println(\"foo\")\n}",
Content: "func foo() {\n fmt.Println(\"foo\")\n}\n",
ContentStart: protocol.Location{Offset: 1, Line: 1},
Ranges: []protocol.Range{{
Start: protocol.Location{Offset: 12, Line: 1, Column: 11},
End: protocol.Location{Offset: 38, Line: 3, Column: 1},
}},
}, {
Content: "func bar() {\n fmt.Println(\"bar\")\n}",
Content: "func bar() {\n fmt.Println(\"bar\")\n}\n",
ContentStart: protocol.Location{Offset: 40, Line: 5},
Ranges: []protocol.Range{{
Start: protocol.Location{Offset: 51, Line: 5, Column: 11},
Expand Down
38 changes: 29 additions & 9 deletions cmd/searcher/internal/search/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ func main() {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "world"}, IsCaseSensitive: true},
want: autogold.Expect(`README.md:3:3:
Hello world example in go
// No newline at end of chunk
main.go:6:6:
fmt.Println("Hello world")
`),
Expand All @@ -95,6 +96,7 @@ fmt.Println("Hello world")
want: autogold.Expect(`README.md:2:3:
Hello world example in go
// No newline at end of chunk
main.go:5:7:
func main() {
fmt.Println("Hello world")
Expand All @@ -107,6 +109,7 @@ fmt.Println("Hello world")
# Hello World
Hello world example in go
// No newline at end of chunk
main.go:4:7:
func main() {
Expand All @@ -120,6 +123,7 @@ fmt.Println("Hello world")
# Hello World
Hello world example in go
// No newline at end of chunk
main.go:1:7:
package main
Expand All @@ -135,6 +139,7 @@ fmt.Println("Hello world")
# Hello World
README.md:3:3:
Hello world example in go
// No newline at end of chunk
main.go:6:6:
fmt.Println("Hello world")
`),
Expand Down Expand Up @@ -169,6 +174,7 @@ fmt.Println("Hello world")
# Hello World
README.md:3:3:
Hello world example in go
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: ""}, ExcludeLangs: []string{"Markdown"}},
Expand All @@ -185,10 +191,14 @@ symlink
# Hello World
README.md:3:3:
Hello world example in go
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "w"}, IncludePaths: []string{`\.(md|txt)$`, `\.txt$`}},
want: autogold.Expect("abc.txt:1:1:\nw\n"),
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "w"}, IncludePaths: []string{`\.(md|txt)$`, `\.txt$`}},
want: autogold.Expect(`abc.txt:1:1:
w
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "world"}, ExcludePaths: "README\\.md"},
want: autogold.Expect(`main.go:6:6:
Expand All @@ -200,13 +210,15 @@ fmt.Println("Hello world")
# Hello World
README.md:3:3:
Hello world example in go
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "w"}, IncludePaths: []string{"\\.(md|txt)", "README"}},
want: autogold.Expect(`README.md:1:1:
# Hello World
README.md:3:3:
Hello world example in go
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "world"}, IncludePaths: []string{`\.(MD|go)$`}, PathPatternsAreCaseSensitive: true},
Expand Down Expand Up @@ -290,15 +302,15 @@ func main() {
# Hello World
Hello world example in go
main.go:1:8:
// No newline at end of chunk
main.go:1:7:
package main
import "fmt"
func main() {
fmt.Println("Hello world")
}
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "^$", IsRegExp: true}},
Expand All @@ -310,8 +322,10 @@ main.go:4:4:
main.go:8:8:
// No newline at end of chunk
milton.png:1:1:
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{
Expand All @@ -324,6 +338,7 @@ milton.png:1:1:
},
want: autogold.Expect(`file++.plus:1:1:
filename contains regex metachars
// No newline at end of chunk
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "World", IsNegated: true}},
Expand Down Expand Up @@ -360,17 +375,18 @@ symlink
`),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "abc"}, PatternMatchesPath: true, PatternMatchesContent: true},
want: autogold.Expect("abc.txt\nsymlink:1:1:\nabc.txt\n"),
want: autogold.Expect("abc.txt\nsymlink:1:1:\nabc.txt\n// No newline at end of chunk\n"),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "abc"}, PatternMatchesPath: false, PatternMatchesContent: true},
want: autogold.Expect("symlink:1:1:\nabc.txt\n"),
want: autogold.Expect("symlink:1:1:\nabc.txt\n// No newline at end of chunk\n"),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "abc"}, PatternMatchesPath: true, PatternMatchesContent: false},
want: autogold.Expect("abc.txt\n"),
}, {
arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "utf8"}, PatternMatchesPath: false, PatternMatchesContent: true},
want: autogold.Expect(`nonutf8.txt:1:1:
file contains invalid utf8 � characters
// No newline at end of chunk
`),
}}

Expand Down Expand Up @@ -740,13 +756,17 @@ func toString(m []protocol.FileMatch) string {
for _, cm := range f.ChunkMatches {
buf.WriteString(f.Path)
buf.WriteByte(':')
buf.WriteString(strconv.Itoa(int(cm.ContentStart.Line) + 1))
firstLine := int(cm.ContentStart.Line) + 1
lastLine := firstLine + strings.Count(strings.TrimSuffix(cm.Content, "\n"), "\n")
buf.WriteString(strconv.Itoa(firstLine))
buf.WriteByte(':')
buf.WriteString(strconv.Itoa(int(cm.ContentStart.Line) + strings.Count(cm.Content, "\n") + 1))
buf.WriteString(strconv.Itoa(lastLine))
buf.WriteByte(':')
buf.WriteByte('\n')
buf.WriteString(cm.Content)
buf.WriteByte('\n')
if !strings.HasSuffix(cm.Content, "\n") {
buf.WriteString("\n// No newline at end of chunk\n")
}
}
}
return buf.String()
Expand Down
Loading

0 comments on commit 8edfc0f

Please sign in to comment.