diff --git a/client/branded/src/search-ui/components/FileContentSearchResult.tsx b/client/branded/src/search-ui/components/FileContentSearchResult.tsx index 4a1702ed1f40..830ced7c2987 100644 --- a/client/branded/src/search-ui/components/FileContentSearchResult.tsx +++ b/client/branded/src/search-ui/components/FileContentSearchResult.tsx @@ -299,7 +299,7 @@ function chunkToMatchGroup(chunk: ChunkMatch): MatchGroup { endLine: range.end.line, endCharacter: range.end.column, })) - const plaintextLines = chunk.content.split(/\r?\n/) + const plaintextLines = chunk.content.replace(/\r?\n$/, '').split(/\r?\n/) return { plaintextLines, highlightedHTMLRows: undefined, // populated lazily diff --git a/client/web-sveltekit/src/lib/search/utils.ts b/client/web-sveltekit/src/lib/search/utils.ts index 420d72f11f08..283681c9ed5a 100644 --- a/client/web-sveltekit/src/lib/search/utils.ts +++ b/client/web-sveltekit/src/lib/search/utils.ts @@ -19,7 +19,7 @@ export function chunkToMatchGroup(chunk: ChunkMatch): MatchGroup { endLine: range.end.line, endCharacter: range.end.column, })) - const plaintextLines = chunk.content.split(/\r?\n/) + const plaintextLines = chunk.content.replace(/\r?\n$/, '').split(/\r?\n/) return { plaintextLines, highlightedHTMLRows: undefined, // populated lazily diff --git a/cmd/searcher/internal/search/chunk.go b/cmd/searcher/internal/search/chunk.go index 016fc4bd92e8..e31805fe2e36 100644 --- a/cmd/searcher/internal/search/chunk.go +++ b/cmd/searcher/internal/search/chunk.go @@ -81,7 +81,13 @@ func chunksToMatches(buf []byte, chunks []rangeChunk, contextLines int32) []prot func extendRangeToLines(inputRange protocol.Range, buf []byte) protocol.Range { firstLineStart := lineStart(buf, inputRange.Start.Offset) lastLineStart := lineStart(buf, inputRange.End.Offset) - lastLineEnd := lineEnd(buf, inputRange.End.Offset) + lastLineEnd := lineEnd(buf, + // We want the end of the line containing the last byte of the + // match, not the first byte after the match. In the case of a + // zero-width match between lines, prefer the line after rather + // than the line before (like we do for lineStart). + max(inputRange.End.Offset, max(inputRange.End.Offset, 1)-1 /* prevent underflow */), + ) return protocol.Range{ Start: protocol.Location{ @@ -113,12 +119,8 @@ func addContextLines(inputRange protocol.Range, buf []byte, contextLines int32) precedingLinesAdded += 1 } - rest := buf[lastLineEnd:] - if bytes.HasPrefix(rest, []byte("\n")) && len(rest) > 1 { - lastLineEnd = lineEnd(buf, lastLineEnd+1) - succeedingLinesAdded += 1 - } else if bytes.HasPrefix(rest, []byte("\r\n")) && len(rest) > 2 { - lastLineEnd = lineEnd(buf, lastLineEnd+2) + if int(lastLineEnd) < len(buf) { + lastLineEnd = lineEnd(buf, lastLineEnd) succeedingLinesAdded += 1 } } @@ -150,10 +152,7 @@ func lineStart(buf []byte, offset int32) int32 { func lineEnd(buf []byte, offset int32) int32 { end := int32(len(buf)) if loc := bytes.IndexByte(buf[offset:], '\n'); loc >= 0 { - end = int32(loc) + offset - if bytes.HasSuffix(buf[:end], []byte("\r")) { - end -= 1 - } + end = int32(loc) + offset + 1 } return end } diff --git a/cmd/searcher/internal/search/chunk_test.go b/cmd/searcher/internal/search/chunk_test.go index 244f01ea8828..01a3065cfe2f 100644 --- a/cmd/searcher/internal/search/chunk_test.go +++ b/cmd/searcher/internal/search/chunk_test.go @@ -172,61 +172,61 @@ func Test_addContext(t *testing.T) { "\n", 0, r(l(0, 0, 0), l(0, 0, 0)), - "", + "\n", }, { "\n", 1, r(l(0, 0, 0), l(0, 0, 0)), - "", + "\n", }, { "\n\n\n", 0, r(l(1, 1, 0), l(1, 1, 0)), - "", + "\n", }, { "\n\n\n\n", 1, r(l(1, 1, 0), l(1, 1, 0)), - "\n\n", + "\n\n\n", }, { "\n\n\n\n", 2, r(l(1, 1, 0), l(1, 1, 0)), - "\n\n\n", + "\n\n\n\n", }, { "abc\ndef\nghi\n", 0, r(l(1, 0, 1), l(1, 0, 1)), - "abc", + "abc\n", }, { "abc\ndef\nghi\n", 1, r(l(1, 0, 1), l(1, 0, 1)), - "abc\ndef", + "abc\ndef\n", }, { "abc\ndef\nghi\n", 2, r(l(1, 0, 1), l(1, 0, 1)), - "abc\ndef\nghi", + "abc\ndef\nghi\n", }, { "abc\ndef\nghi", 0, r(l(1, 0, 1), l(1, 0, 1)), - "abc", + "abc\n", }, { "abc\ndef\nghi", 1, r(l(1, 0, 1), l(1, 0, 1)), - "abc\ndef", + "abc\ndef\n", }, { "abc\ndef\nghi", @@ -256,7 +256,7 @@ func Test_addContext(t *testing.T) { "abc\r\ndef\r\nghi\r\n", 1, r(l(1, 0, 1), l(2, 0, 2)), - "abc\r\ndef", + "abc\r\ndef\r\n", }, { "abc\r\ndef\r\nghi", @@ -268,19 +268,19 @@ func Test_addContext(t *testing.T) { "\r\n", 0, r(l(0, 0, 0), l(0, 0, 0)), - "", + "\r\n", }, { "\r\n", 1, r(l(0, 0, 0), l(0, 0, 0)), - "", + "\r\n", }, { "abc\nd\xE2\x9D\x89f\nghi", 0, r(l(4, 1, 0), l(5, 1, 1)), - "d\xE2\x9D\x89f", + "d\xE2\x9D\x89f\n", }, { "abc\nd\xE2\x9D\x89f\nghi", diff --git a/cmd/searcher/internal/search/hybrid_test.go b/cmd/searcher/internal/search/hybrid_test.go index cbffcd9a0093..5d5ba1d40e4f 100644 --- a/cmd/searcher/internal/search/hybrid_test.go +++ b/cmd/searcher/internal/search/hybrid_test.go @@ -155,12 +155,14 @@ Hello world example in go`, typeFile}, Want: ` added.md:1:1: hello world I am added +// No newline at end of chunk changed.go:6:6: fmt.Println("Hello world") unchanged.md:1:1: # Hello World unchanged.md:3:3: Hello world example in go +// No newline at end of chunk `, }, { Name: "added", @@ -171,6 +173,7 @@ Hello world example in go Want: ` added.md:1:1: hello world I am added +// No newline at end of chunk `, }, { Name: "example", @@ -180,6 +183,7 @@ hello world I am added Want: ` unchanged.md:3:3: Hello world example in go +// No newline at end of chunk `, }, { Name: "boolean query", @@ -199,6 +203,7 @@ Hello world example in go Want: ` added.md:1:1: hello world I am added +// No newline at end of chunk changed.go:1:1: package main changed.go:6:6: @@ -207,6 +212,7 @@ unchanged.md:1:1: # Hello World unchanged.md:3:3: Hello world example in go +// No newline at end of chunk `, }, { Name: "negated-pattern-example", @@ -268,6 +274,7 @@ unchanged.md changed.go unchanged.md:3:3: Hello world example in go +// No newline at end of chunk `, }, { Name: "negated-pattern-path", diff --git a/cmd/searcher/internal/search/search_structural_test.go b/cmd/searcher/internal/search/search_structural_test.go index 3f855f5ae9d5..f070eb2d1e3a 100644 --- a/cmd/searcher/internal/search/search_structural_test.go +++ b/cmd/searcher/internal/search/search_structural_test.go @@ -500,14 +500,14 @@ func bar() { expected := []protocol.FileMatch{{ Path: "main.go", ChunkMatches: []protocol.ChunkMatch{{ - Content: "func foo() {\n fmt.Println(\"foo\")\n}", + Content: "func foo() {\n fmt.Println(\"foo\")\n}\n", ContentStart: protocol.Location{Offset: 1, Line: 1}, Ranges: []protocol.Range{{ Start: protocol.Location{Offset: 12, Line: 1, Column: 11}, End: protocol.Location{Offset: 38, Line: 3, Column: 1}, }}, }, { - Content: "func bar() {\n fmt.Println(\"bar\")\n}", + Content: "func bar() {\n fmt.Println(\"bar\")\n}\n", ContentStart: protocol.Location{Offset: 40, Line: 5}, Ranges: []protocol.Range{{ Start: protocol.Location{Offset: 51, Line: 5, Column: 11}, diff --git a/cmd/searcher/internal/search/search_test.go b/cmd/searcher/internal/search/search_test.go index d826077dcba8..67498765fca1 100644 --- a/cmd/searcher/internal/search/search_test.go +++ b/cmd/searcher/internal/search/search_test.go @@ -86,6 +86,7 @@ func main() { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "world"}, IsCaseSensitive: true}, want: autogold.Expect(`README.md:3:3: Hello world example in go +// No newline at end of chunk main.go:6:6: fmt.Println("Hello world") `), @@ -95,6 +96,7 @@ fmt.Println("Hello world") want: autogold.Expect(`README.md:2:3: Hello world example in go +// No newline at end of chunk main.go:5:7: func main() { fmt.Println("Hello world") @@ -107,6 +109,7 @@ fmt.Println("Hello world") # Hello World Hello world example in go +// No newline at end of chunk main.go:4:7: func main() { @@ -120,6 +123,7 @@ fmt.Println("Hello world") # Hello World Hello world example in go +// No newline at end of chunk main.go:1:7: package main @@ -135,6 +139,7 @@ fmt.Println("Hello world") # Hello World README.md:3:3: Hello world example in go +// No newline at end of chunk main.go:6:6: fmt.Println("Hello world") `), @@ -169,6 +174,7 @@ fmt.Println("Hello world") # Hello World README.md:3:3: Hello world example in go +// No newline at end of chunk `), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: ""}, ExcludeLangs: []string{"Markdown"}}, @@ -185,10 +191,14 @@ symlink # Hello World README.md:3:3: Hello world example in go +// No newline at end of chunk `), }, { - arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "w"}, IncludePaths: []string{`\.(md|txt)$`, `\.txt$`}}, - want: autogold.Expect("abc.txt:1:1:\nw\n"), + arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "w"}, IncludePaths: []string{`\.(md|txt)$`, `\.txt$`}}, + want: autogold.Expect(`abc.txt:1:1: +w +// No newline at end of chunk +`), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "world"}, ExcludePaths: "README\\.md"}, want: autogold.Expect(`main.go:6:6: @@ -200,6 +210,7 @@ fmt.Println("Hello world") # Hello World README.md:3:3: Hello world example in go +// No newline at end of chunk `), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "w"}, IncludePaths: []string{"\\.(md|txt)", "README"}}, @@ -207,6 +218,7 @@ Hello world example in go # Hello World README.md:3:3: Hello world example in go +// No newline at end of chunk `), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "world"}, IncludePaths: []string{`\.(MD|go)$`}, PathPatternsAreCaseSensitive: true}, @@ -290,7 +302,8 @@ func main() { # Hello World Hello world example in go -main.go:1:8: +// No newline at end of chunk +main.go:1:7: package main import "fmt" @@ -298,7 +311,6 @@ import "fmt" func main() { fmt.Println("Hello world") } - `), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "^$", IsRegExp: true}}, @@ -310,8 +322,10 @@ main.go:4:4: main.go:8:8: +// No newline at end of chunk milton.png:1:1: +// No newline at end of chunk `), }, { arg: protocol.PatternInfo{ @@ -324,6 +338,7 @@ milton.png:1:1: }, want: autogold.Expect(`file++.plus:1:1: filename contains regex metachars +// No newline at end of chunk `), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "World", IsNegated: true}}, @@ -360,10 +375,10 @@ symlink `), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "abc"}, PatternMatchesPath: true, PatternMatchesContent: true}, - want: autogold.Expect("abc.txt\nsymlink:1:1:\nabc.txt\n"), + want: autogold.Expect("abc.txt\nsymlink:1:1:\nabc.txt\n// No newline at end of chunk\n"), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "abc"}, PatternMatchesPath: false, PatternMatchesContent: true}, - want: autogold.Expect("symlink:1:1:\nabc.txt\n"), + want: autogold.Expect("symlink:1:1:\nabc.txt\n// No newline at end of chunk\n"), }, { arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "abc"}, PatternMatchesPath: true, PatternMatchesContent: false}, want: autogold.Expect("abc.txt\n"), @@ -371,6 +386,7 @@ symlink arg: protocol.PatternInfo{Query: &protocol.PatternNode{Value: "utf8"}, PatternMatchesPath: false, PatternMatchesContent: true}, want: autogold.Expect(`nonutf8.txt:1:1: file contains invalid utf8 � characters +// No newline at end of chunk `), }} @@ -740,13 +756,17 @@ func toString(m []protocol.FileMatch) string { for _, cm := range f.ChunkMatches { buf.WriteString(f.Path) buf.WriteByte(':') - buf.WriteString(strconv.Itoa(int(cm.ContentStart.Line) + 1)) + firstLine := int(cm.ContentStart.Line) + 1 + lastLine := firstLine + strings.Count(strings.TrimSuffix(cm.Content, "\n"), "\n") + buf.WriteString(strconv.Itoa(firstLine)) buf.WriteByte(':') - buf.WriteString(strconv.Itoa(int(cm.ContentStart.Line) + strings.Count(cm.Content, "\n") + 1)) + buf.WriteString(strconv.Itoa(lastLine)) buf.WriteByte(':') buf.WriteByte('\n') buf.WriteString(cm.Content) - buf.WriteByte('\n') + if !strings.HasSuffix(cm.Content, "\n") { + buf.WriteString("\n// No newline at end of chunk\n") + } } } return buf.String() diff --git a/deps.bzl b/deps.bzl index 7eb5cdc669ee..82393c1a0089 100644 --- a/deps.bzl +++ b/deps.bzl @@ -5480,8 +5480,8 @@ def go_dependencies(): patches = [ "//third_party/com_github_sourcegraph_zoekt:x_defs_version.patch", ], - sum = "h1:3DJmyiTtoczytYdvoBqwawkSRZEGZeZB9v0DjfQ6irY=", - version = "v0.0.0-20240417165306-43b92256ba71", + sum = "h1:ZoA5u9P6wjoUFwfgi+alUVLJ60dc0XkshTjzDNoQpGg=", + version = "v0.0.0-20240418025752-74e75efaded6", ) go_repository( name = "com_github_spaolacci_murmur3", diff --git a/go.mod b/go.mod index bcaa474938c2..5c73f955169d 100644 --- a/go.mod +++ b/go.mod @@ -587,7 +587,7 @@ require ( github.com/scim2/filter-parser/v2 v2.2.0 github.com/sourcegraph/conc v0.3.1-0.20240108182409-4afefce20f9b github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1 - github.com/sourcegraph/zoekt v0.0.0-20240417165306-43b92256ba71 + github.com/sourcegraph/zoekt v0.0.0-20240418025752-74e75efaded6 github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.5.2 // indirect diff --git a/go.sum b/go.sum index 64d1b0735d4c..901c7cd8533a 100644 --- a/go.sum +++ b/go.sum @@ -1703,8 +1703,8 @@ github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240409140445-b228ef9 github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240409140445-b228ef93f415/go.mod h1:BQ+bIwhTWmR6VFtLsCs9Ui4o7HJLAtVnjVdWCQRHXms= github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o= github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I= -github.com/sourcegraph/zoekt v0.0.0-20240417165306-43b92256ba71 h1:3DJmyiTtoczytYdvoBqwawkSRZEGZeZB9v0DjfQ6irY= -github.com/sourcegraph/zoekt v0.0.0-20240417165306-43b92256ba71/go.mod h1:+j+huwz4ZnffJmDHeLJyI9AY4a8DKQnfNV0J//upnyo= +github.com/sourcegraph/zoekt v0.0.0-20240418025752-74e75efaded6 h1:ZoA5u9P6wjoUFwfgi+alUVLJ60dc0XkshTjzDNoQpGg= +github.com/sourcegraph/zoekt v0.0.0-20240418025752-74e75efaded6/go.mod h1:+j+huwz4ZnffJmDHeLJyI9AY4a8DKQnfNV0J//upnyo= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= diff --git a/internal/search/result/file.go b/internal/search/result/file.go index 84681c35365a..3e1cdf1bc875 100644 --- a/internal/search/result/file.go +++ b/internal/search/result/file.go @@ -279,7 +279,7 @@ func (cm ChunkMatch) MatchedContent() []string { // between lines in a multiline match, but it allows us to keep providing the // LineMatch representation for clients without breaking backwards compatibility. func (h ChunkMatch) AsLineMatches() []*LineMatch { - lines := strings.Split(h.Content, "\n") + lines := strings.Split(strings.TrimSuffix(h.Content, "\n"), "\n") lineMatches := make([]*LineMatch, 0, len(lines)) for i, line := range lines { lineNumber := h.ContentStart.Line + i diff --git a/internal/search/result/file_test.go b/internal/search/result/file_test.go index 4f748f301dc8..cf76aa4d13e8 100644 --- a/internal/search/result/file_test.go +++ b/internal/search/result/file_test.go @@ -114,7 +114,7 @@ func TestConvertMatches(t *testing.T) { output []*LineMatch }{{ input: ChunkMatch{ - Content: "line1\nline2\nline3", + Content: "line1\nline2\nline3\n", ContentStart: Location{Line: 1}, Ranges: Ranges{{ Start: Location{1, 1, 1}, @@ -187,7 +187,7 @@ func TestConvertMatches(t *testing.T) { }}, }, { input: ChunkMatch{ - Content: "line1\nline2", + Content: "line1\nline2\n", ContentStart: Location{Line: 1}, Ranges: Ranges{{ Start: Location{0, 1, 0},