diff --git a/build/builder.go b/build/builder.go index dfbfcf7fa..4f8824e1f 100644 --- a/build/builder.go +++ b/build/builder.go @@ -37,7 +37,7 @@ import ( "time" "github.com/bmatcuk/doublestar" - "github.com/grafana/regexp" + "github.com/go-enry/go-enry/v2" "github.com/rs/xid" "github.com/sourcegraph/zoekt" @@ -901,18 +901,8 @@ func squashRange(j int) float64 { // // These 'priority' criteria affects how documents are ordered within a shard. It's // also used to help guess a file's rank when we're missing ranking information. -func IsLowPriority(file string) bool { - return testRe.MatchString(file) || isGenerated(file) || isVendored(file) -} - -var testRe = regexp.MustCompile("[Tt]est") - -func isGenerated(file string) bool { - return strings.HasSuffix(file, "min.js") || strings.HasSuffix(file, "js.map") -} - -func isVendored(file string) bool { - return strings.Contains(file, "vendor/") || strings.Contains(file, "node_modules/") +func IsLowPriority(path string, content []byte) bool { + return enry.IsTest(path) || enry.IsVendor(path) || enry.IsGenerated(path, content) } type rankedDoc struct { @@ -931,17 +921,17 @@ func rank(d *zoekt.Document, origIdx int) []float64 { } generated := 0.0 - if isGenerated(d.Name) { + if enry.IsGenerated(d.Name, d.Content) { generated = 1.0 } vendor := 0.0 - if isVendored(d.Name) { + if enry.IsVendor(d.Name) { vendor = 1.0 } test := 0.0 - if testRe.MatchString(d.Name) { + if enry.IsTest(d.Name) { test = 1.0 } diff --git a/build/builder_test.go b/build/builder_test.go index e5f4742df..ecf5c82f8 100644 --- a/build/builder_test.go +++ b/build/builder_test.go @@ -764,8 +764,7 @@ func TestFindRepositoryMetadata(t *testing.T) { func TestIsLowPriority(t *testing.T) { cases := []string{ "builder_test.go", - "TestQuery.java", - "test/mocks.go", + "test/TestQuery.java", "search/vendor/thirdparty.cc", "search/node_modules/search/index.js", "search.min.js", @@ -774,7 +773,7 @@ func TestIsLowPriority(t *testing.T) { for _, tt := range cases { t.Run(tt, func(t *testing.T) { - if !IsLowPriority(tt) { + if !IsLowPriority(tt, nil) { t.Errorf("expected file '%s' to be low priority", tt) } }) @@ -788,7 +787,7 @@ func TestIsLowPriority(t *testing.T) { for _, tt := range negativeCases { t.Run(tt, func(t *testing.T) { - if IsLowPriority(tt) { + if IsLowPriority(tt, nil) { t.Errorf("did not expect file '%s' to be low priority", tt) } }) diff --git a/build/e2e_test.go b/build/e2e_test.go index f66b64b9e..1eb5acff4 100644 --- a/build/e2e_test.go +++ b/build/e2e_test.go @@ -502,7 +502,7 @@ func TestFileRank(t *testing.T) { name: "test", docs: []*zoekt.Document{ { - Name: "test", + Name: "foo_test.go", Content: []byte("bla"), }, { diff --git a/gitindex/index.go b/gitindex/index.go index 92cf3686c..ddf73e0d6 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -589,10 +589,10 @@ type repoPathRanks struct { // - If we have a concrete rank for this file, always use it // - If there's no rank, and it's a low priority file like a test, then use rank 0 // - Otherwise use the mean rank of this repository, to avoid giving it a big disadvantage -func (r repoPathRanks) rank(path string) float64 { +func (r repoPathRanks) rank(path string, content []byte) float64 { if rank, ok := r.Paths[path]; ok { return rank - } else if build.IsLowPriority(path) { + } else if build.IsLowPriority(path, content) { return 0.0 } else { return r.MeanRank @@ -910,7 +910,7 @@ func createDocument(key fileKey, var pathRanks []float64 if len(ranks.Paths) > 0 { // If the repository has ranking data, then store the file's rank. - pathRank := ranks.rank(keyFullPath) + pathRank := ranks.rank(keyFullPath, contents) pathRanks = []float64{pathRank} } diff --git a/gitindex/index_test.go b/gitindex/index_test.go index 63f5899a7..0aca04145 100644 --- a/gitindex/index_test.go +++ b/gitindex/index_test.go @@ -744,7 +744,7 @@ func TestRepoPathRanks(t *testing.T) { for _, tt := range cases { t.Run(tt.name, func(t *testing.T) { - got := pathRanks.rank(tt.path) + got := pathRanks.rank(tt.path, nil) if got != tt.rank { t.Errorf("expected file '%s' to have rank %f, but got %f", tt.path, tt.rank, got) }