diff --git a/build/builder.go b/build/builder.go index dfbfcf7fa..4f8824e1f 100644 --- a/build/builder.go +++ b/build/builder.go @@ -37,7 +37,7 @@ import ( "time" "github.com/bmatcuk/doublestar" - "github.com/grafana/regexp" + "github.com/go-enry/go-enry/v2" "github.com/rs/xid" "github.com/sourcegraph/zoekt" @@ -901,18 +901,8 @@ func squashRange(j int) float64 { // // These 'priority' criteria affects how documents are ordered within a shard. It's // also used to help guess a file's rank when we're missing ranking information. -func IsLowPriority(file string) bool { - return testRe.MatchString(file) || isGenerated(file) || isVendored(file) -} - -var testRe = regexp.MustCompile("[Tt]est") - -func isGenerated(file string) bool { - return strings.HasSuffix(file, "min.js") || strings.HasSuffix(file, "js.map") -} - -func isVendored(file string) bool { - return strings.Contains(file, "vendor/") || strings.Contains(file, "node_modules/") +func IsLowPriority(path string, content []byte) bool { + return enry.IsTest(path) || enry.IsVendor(path) || enry.IsGenerated(path, content) } type rankedDoc struct { @@ -931,17 +921,17 @@ func rank(d *zoekt.Document, origIdx int) []float64 { } generated := 0.0 - if isGenerated(d.Name) { + if enry.IsGenerated(d.Name, d.Content) { generated = 1.0 } vendor := 0.0 - if isVendored(d.Name) { + if enry.IsVendor(d.Name) { vendor = 1.0 } test := 0.0 - if testRe.MatchString(d.Name) { + if enry.IsTest(d.Name) { test = 1.0 } diff --git a/build/builder_test.go b/build/builder_test.go index e5f4742df..aec77a4e3 100644 --- a/build/builder_test.go +++ b/build/builder_test.go @@ -764,8 +764,7 @@ func TestFindRepositoryMetadata(t *testing.T) { func TestIsLowPriority(t *testing.T) { cases := []string{ "builder_test.go", - "TestQuery.java", - "test/mocks.go", + "test/TestQuery.java", "search/vendor/thirdparty.cc", "search/node_modules/search/index.js", "search.min.js", @@ -774,7 +773,7 @@ func TestIsLowPriority(t *testing.T) { for _, tt := range cases { t.Run(tt, func(t *testing.T) { - if !IsLowPriority(tt) { + if !IsLowPriority(tt, nil) { t.Errorf("expected file '%s' to be low priority", tt) } }) @@ -788,11 +787,22 @@ func TestIsLowPriority(t *testing.T) { for _, tt := range negativeCases { t.Run(tt, func(t *testing.T) { - if IsLowPriority(tt) { + if IsLowPriority(tt, nil) { t.Errorf("did not expect file '%s' to be low priority", tt) } }) } + + // Explicitly check that content is important by using the same filename but + // different content. + normal := "package mock\n\nvar Mock struct {}" + generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}" + if IsLowPriority("mock.go", []byte(normal)) { + t.Error("expected non-generated content to not be low priority") + } + if !IsLowPriority("mock.go", []byte(generated)) { + t.Error("expected generated content to be low priority") + } } func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { diff --git a/build/e2e_test.go b/build/e2e_test.go index f66b64b9e..1eb5acff4 100644 --- a/build/e2e_test.go +++ b/build/e2e_test.go @@ -502,7 +502,7 @@ func TestFileRank(t *testing.T) { name: "test", docs: []*zoekt.Document{ { - Name: "test", + Name: "foo_test.go", Content: []byte("bla"), }, { diff --git a/gitindex/index.go b/gitindex/index.go index 92cf3686c..ddf73e0d6 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -589,10 +589,10 @@ type repoPathRanks struct { // - If we have a concrete rank for this file, always use it // - If there's no rank, and it's a low priority file like a test, then use rank 0 // - Otherwise use the mean rank of this repository, to avoid giving it a big disadvantage -func (r repoPathRanks) rank(path string) float64 { +func (r repoPathRanks) rank(path string, content []byte) float64 { if rank, ok := r.Paths[path]; ok { return rank - } else if build.IsLowPriority(path) { + } else if build.IsLowPriority(path, content) { return 0.0 } else { return r.MeanRank @@ -910,7 +910,7 @@ func createDocument(key fileKey, var pathRanks []float64 if len(ranks.Paths) > 0 { // If the repository has ranking data, then store the file's rank. - pathRank := ranks.rank(keyFullPath) + pathRank := ranks.rank(keyFullPath, contents) pathRanks = []float64{pathRank} } diff --git a/gitindex/index_test.go b/gitindex/index_test.go index 63f5899a7..0aca04145 100644 --- a/gitindex/index_test.go +++ b/gitindex/index_test.go @@ -744,7 +744,7 @@ func TestRepoPathRanks(t *testing.T) { for _, tt := range cases { t.Run(tt.name, func(t *testing.T) { - got := pathRanks.rank(tt.path) + got := pathRanks.rank(tt.path, nil) if got != tt.rank { t.Errorf("expected file '%s' to have rank %f, but got %f", tt.path, tt.rank, got) } diff --git a/internal/e2e/testdata/coverage_data_writer.txt b/internal/e2e/testdata/coverage_data_writer.txt index 06abe41e5..ce4b3b8dd 100644 --- a/internal/e2e/testdata/coverage_data_writer.txt +++ b/internal/e2e/testdata/coverage_data_writer.txt @@ -14,6 +14,12 @@ github.com/golang/go/src/cmd/cover/func.go 32:// as output the coverage data broken down by function, like this: hidden 8 more line matches +github.com/golang/go/src/testing/fuzz.go +93: Data []byte +205:// modify the underlying data of the arguments provided by the fuzzing engine. +275: run := func(captureOut io.Writer, e corpusEntry) (ok bool) { +hidden 7 more line matches + github.com/golang/go/src/cmd/cover/html.go 199: Coverage float64 170:type templateData struct { @@ -26,12 +32,6 @@ github.com/golang/go/src/internal/fuzz/fuzz.go 908:func (c *coordinator) updateCoverage(newCoverage []byte) int { hidden 91 more line matches -github.com/golang/go/src/testing/fuzz.go -93: Data []byte -205:// modify the underlying data of the arguments provided by the fuzzing engine. -275: run := func(captureOut io.Writer, e corpusEntry) (ok bool) { -hidden 7 more line matches - github.com/golang/go/src/cmd/vendor/golang.org/x/sys/unix/ztypes_linux.go 227: Data [7]byte 449: Data [8]uint32 diff --git a/internal/e2e/testdata/test_server.txt b/internal/e2e/testdata/test_server.txt index e569d8553..8cfce010c 100644 --- a/internal/e2e/testdata/test_server.txt +++ b/internal/e2e/testdata/test_server.txt @@ -32,10 +32,10 @@ github.com/sourcegraph/sourcegraph-public-snapshot/cmd/gitserver/server/server.g 741:func (s *Server) serverContext() (context.Context, context.CancelFunc) { hidden 166 more line matches -github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/main.go -150:type Server struct { -1232:func startServer(conf rootConfig) error { -1309:func newServer(conf rootConfig) (*Server, error) { -hidden 52 more line matches +github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/testing.go +46:type Test struct { +79:func RunTest(t *testing.T, test *Test) { +58:func RunTests(t *testing.T, tests []*Test) { +hidden 27 more line matches hidden 494 more file matches diff --git a/internal/e2e/testdata/time_compare.txt b/internal/e2e/testdata/time_compare.txt index 32da48b4f..b6b732d56 100644 --- a/internal/e2e/testdata/time_compare.txt +++ b/internal/e2e/testdata/time_compare.txt @@ -29,10 +29,10 @@ github.com/golang/go/src/go/constant/value.go 1381: re := Compare(x.re, token.EQL, y.re) hidden 1 more line matches -github.com/golang/go/src/syscall/zsyscall_windows.go -878:func GetSystemTimeAsFileTime(time *Filetime) { -1088:func SetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetime) (err error) { -132: procGetSystemTimeAsFileTime = modkernel32.NewProc("GetSystemTimeAsFileTime") -hidden 19 more line matches +github.com/golang/go/src/cmd/go/internal/gover/gover.go +36:func Compare(x, y string) int { +20:// but at the time this code was written, there was an existing test that used +49: if c := cmp.Compare(vx.kind, vy.kind); c != 0 { // "" < alpha < beta < rc +hidden 4 more line matches hidden 139 more file matches