Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: use enry to detect low priority files #829

Merged
merged 3 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
"time"

"github.com/bmatcuk/doublestar"
"github.com/grafana/regexp"
"github.com/go-enry/go-enry/v2"
"github.com/rs/xid"

"github.com/sourcegraph/zoekt"
Expand Down Expand Up @@ -901,18 +901,8 @@ func squashRange(j int) float64 {
//
// These 'priority' criteria affects how documents are ordered within a shard. It's
// also used to help guess a file's rank when we're missing ranking information.
func IsLowPriority(file string) bool {
return testRe.MatchString(file) || isGenerated(file) || isVendored(file)
}

var testRe = regexp.MustCompile("[Tt]est")

func isGenerated(file string) bool {
return strings.HasSuffix(file, "min.js") || strings.HasSuffix(file, "js.map")
}

func isVendored(file string) bool {
return strings.Contains(file, "vendor/") || strings.Contains(file, "node_modules/")
func IsLowPriority(path string, content []byte) bool {
return enry.IsTest(path) || enry.IsVendor(path) || enry.IsGenerated(path, content)
}

type rankedDoc struct {
Expand All @@ -931,17 +921,17 @@ func rank(d *zoekt.Document, origIdx int) []float64 {
}

generated := 0.0
if isGenerated(d.Name) {
if enry.IsGenerated(d.Name, d.Content) {
generated = 1.0
}

vendor := 0.0
if isVendored(d.Name) {
if enry.IsVendor(d.Name) {
vendor = 1.0
}

test := 0.0
if testRe.MatchString(d.Name) {
if enry.IsTest(d.Name) {
test = 1.0
}

Expand Down
18 changes: 14 additions & 4 deletions build/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,8 +764,7 @@ func TestFindRepositoryMetadata(t *testing.T) {
func TestIsLowPriority(t *testing.T) {
cases := []string{
"builder_test.go",
"TestQuery.java",
"test/mocks.go",
"test/TestQuery.java",
"search/vendor/thirdparty.cc",
"search/node_modules/search/index.js",
"search.min.js",
Expand All @@ -774,7 +773,7 @@ func TestIsLowPriority(t *testing.T) {

for _, tt := range cases {
t.Run(tt, func(t *testing.T) {
if !IsLowPriority(tt) {
if !IsLowPriority(tt, nil) {
t.Errorf("expected file '%s' to be low priority", tt)
}
})
Expand All @@ -788,11 +787,22 @@ func TestIsLowPriority(t *testing.T) {

for _, tt := range negativeCases {
t.Run(tt, func(t *testing.T) {
if IsLowPriority(tt) {
if IsLowPriority(tt, nil) {
keegancsmith marked this conversation as resolved.
Show resolved Hide resolved
t.Errorf("did not expect file '%s' to be low priority", tt)
}
})
}

// Explicitly check that content is important by using the same filename but
// different content.
normal := "package mock\n\nvar Mock struct {}"
generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}"
if IsLowPriority("mock.go", []byte(normal)) {
t.Error("expected non-generated content to not be low priority")
}
if !IsLowPriority("mock.go", []byte(generated)) {
t.Error("expected generated content to be low priority")
}
}

func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string {
Expand Down
2 changes: 1 addition & 1 deletion build/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ func TestFileRank(t *testing.T) {
name: "test",
docs: []*zoekt.Document{
{
Name: "test",
Name: "foo_test.go",
Content: []byte("bla"),
},
{
Expand Down
6 changes: 3 additions & 3 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -589,10 +589,10 @@ type repoPathRanks struct {
// - If we have a concrete rank for this file, always use it
// - If there's no rank, and it's a low priority file like a test, then use rank 0
// - Otherwise use the mean rank of this repository, to avoid giving it a big disadvantage
func (r repoPathRanks) rank(path string) float64 {
func (r repoPathRanks) rank(path string, content []byte) float64 {
if rank, ok := r.Paths[path]; ok {
return rank
} else if build.IsLowPriority(path) {
} else if build.IsLowPriority(path, content) {
return 0.0
} else {
return r.MeanRank
Expand Down Expand Up @@ -910,7 +910,7 @@ func createDocument(key fileKey,
var pathRanks []float64
if len(ranks.Paths) > 0 {
// If the repository has ranking data, then store the file's rank.
pathRank := ranks.rank(keyFullPath)
pathRank := ranks.rank(keyFullPath, contents)
pathRanks = []float64{pathRank}
}

Expand Down
2 changes: 1 addition & 1 deletion gitindex/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ func TestRepoPathRanks(t *testing.T) {

for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got := pathRanks.rank(tt.path)
got := pathRanks.rank(tt.path, nil)
if got != tt.rank {
t.Errorf("expected file '%s' to have rank %f, but got %f", tt.path, tt.rank, got)
}
Expand Down
12 changes: 6 additions & 6 deletions internal/e2e/testdata/coverage_data_writer.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ github.com/golang/go/src/cmd/cover/func.go
32:// as output the coverage data broken down by function, like this:
hidden 8 more line matches

github.com/golang/go/src/testing/fuzz.go
93: Data []byte
205:// modify the underlying data of the arguments provided by the fuzzing engine.
275: run := func(captureOut io.Writer, e corpusEntry) (ok bool) {
hidden 7 more line matches

github.com/golang/go/src/cmd/cover/html.go
199: Coverage float64
170:type templateData struct {
Expand All @@ -26,12 +32,6 @@ github.com/golang/go/src/internal/fuzz/fuzz.go
908:func (c *coordinator) updateCoverage(newCoverage []byte) int {
hidden 91 more line matches

github.com/golang/go/src/testing/fuzz.go
93: Data []byte
205:// modify the underlying data of the arguments provided by the fuzzing engine.
275: run := func(captureOut io.Writer, e corpusEntry) (ok bool) {
hidden 7 more line matches

github.com/golang/go/src/cmd/vendor/golang.org/x/sys/unix/ztypes_linux.go
227: Data [7]byte
449: Data [8]uint32
Expand Down
10 changes: 5 additions & 5 deletions internal/e2e/testdata/test_server.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ github.com/sourcegraph/sourcegraph-public-snapshot/cmd/gitserver/server/server.g
741:func (s *Server) serverContext() (context.Context, context.CancelFunc) {
hidden 166 more line matches

github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/main.go
150:type Server struct {
1232:func startServer(conf rootConfig) error {
1309:func newServer(conf rootConfig) (*Server, error) {
hidden 52 more line matches
github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/testing.go
46:type Test struct {
79:func RunTest(t *testing.T, test *Test) {
58:func RunTests(t *testing.T, tests []*Test) {
hidden 27 more line matches
Comment on lines +35 to +39
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This result seems worse, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so? The old result has nothing to do with test, and the new result has nothing to do with server. It makes sense that this result ends up higher since it has more exported symbols matching a token?


hidden 494 more file matches
10 changes: 5 additions & 5 deletions internal/e2e/testdata/time_compare.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ github.com/golang/go/src/go/constant/value.go
1381: re := Compare(x.re, token.EQL, y.re)
hidden 1 more line matches

github.com/golang/go/src/syscall/zsyscall_windows.go
878:func GetSystemTimeAsFileTime(time *Filetime) {
1088:func SetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetime) (err error) {
132: procGetSystemTimeAsFileTime = modkernel32.NewProc("GetSystemTimeAsFileTime")
hidden 19 more line matches
github.com/golang/go/src/cmd/go/internal/gover/gover.go
36:func Compare(x, y string) int {
20:// but at the time this code was written, there was an existing test that used
49: if c := cmp.Compare(vx.kind, vy.kind); c != 0 { // "" < alpha < beta < rc
hidden 4 more line matches

hidden 139 more file matches
Loading