-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
archive: e2e test for ranking against sourcegraph repo
This is an initial framework for having golden file results for search results against a real repository. At first we have only added one query and one repository, but it should be straightforward to grow this list further. The golden files we write to disk are a summary of results with debug information. This matches how we have been using the zoekt CLI tool on the keyword branch during our ranking work. Test Plan: go test
- Loading branch information
1 parent
137eb8f
commit 9dad685
Showing
2 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
package main | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"flag" | ||
"fmt" | ||
"io" | ||
"net/url" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
"github.com/sourcegraph/zoekt" | ||
"github.com/sourcegraph/zoekt/build" | ||
"github.com/sourcegraph/zoekt/query" | ||
"github.com/sourcegraph/zoekt/shards" | ||
) | ||
|
||
var update = flag.Bool("update", false, "update golden file") | ||
|
||
func TestRanking(t *testing.T) { | ||
requireCTags(t) | ||
|
||
archiveURLs := []string{ | ||
"https://github.com/sourcegraph/sourcegraph/tree/v5.2.2", | ||
} | ||
queries := []string{ | ||
"graphql type User", | ||
} | ||
|
||
indexDir := t.TempDir() | ||
|
||
for _, u := range archiveURLs { | ||
if err := indexURL(indexDir, u); err != nil { | ||
t.Fatal(err) | ||
} | ||
} | ||
|
||
ss, err := shards.NewDirectorySearcher(indexDir) | ||
if err != nil { | ||
t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) | ||
} | ||
defer ss.Close() | ||
|
||
for _, queryStr := range queries { | ||
// normalise queryStr for writing to fs | ||
name := strings.Map(func(r rune) rune { | ||
if strings.ContainsRune(" :", r) { | ||
return '_' | ||
} | ||
if '0' <= r && r <= '9' || | ||
'a' <= r && r <= 'z' || | ||
'A' <= r && r <= 'Z' { | ||
return r | ||
} | ||
return -1 | ||
}, queryStr) | ||
|
||
t.Run(name, func(t *testing.T) { | ||
q, err := query.Parse(queryStr) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
sOpts := zoekt.SearchOptions{ | ||
DebugScore: true, | ||
} | ||
result, err := ss.Search(context.Background(), q, &sOpts) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
var gotBuf bytes.Buffer | ||
marshalMatches(&gotBuf, queryStr, q, result.Files) | ||
got := gotBuf.Bytes() | ||
|
||
wantPath := filepath.Join("testdata", name+".txt") | ||
if *update { | ||
if err := os.WriteFile(wantPath, got, 0600); err != nil { | ||
t.Fatal(err) | ||
} | ||
} | ||
want, err := os.ReadFile(wantPath) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
if d := cmp.Diff(string(want), string(got)); d != "" { | ||
t.Fatalf("unexpected (-want, +got):\n%s", d) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
var tarballCache = "/tmp/zoekt-test-ranking-tarballs-" + os.Getenv("USER") | ||
|
||
func indexURL(indexDir, u string) error { | ||
if err := os.MkdirAll(tarballCache, 0700); err != nil { | ||
return err | ||
} | ||
|
||
opts := Options{ | ||
Archive: u, | ||
} | ||
opts.SetDefaults() // sets metadata like Name and the codeload URL | ||
u = opts.Archive | ||
|
||
// update Archive location to cached location | ||
cacheBase := fmt.Sprintf("%s-%s%s.tar.gz", url.QueryEscape(opts.Name), opts.Branch, opts.Commit) // assume .tar.gz | ||
path := filepath.Join(tarballCache, cacheBase) | ||
opts.Archive = path | ||
|
||
if _, err := os.Stat(path); os.IsNotExist(err) { | ||
if err := download(u, path); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
// TODO scip | ||
// languageMap := make(ctags.LanguageMap) | ||
// for _, lang := range []string{"kotlin", "rust", "ruby", "go", "python", "javascript", "c_sharp", "scala", "typescript", "zig"} { | ||
// languageMap[lang] = ctags.ScipCTags | ||
// } | ||
|
||
err := do(opts, build.Options{ | ||
IndexDir: indexDir, | ||
CTagsMustSucceed: true, | ||
}) | ||
if err != nil { | ||
return fmt.Errorf("failed to index %s: %w", opts.Archive, err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func download(url, dst string) error { | ||
tmpPath := dst + ".part" | ||
|
||
rc, err := openReader(url) | ||
if err != nil { | ||
return err | ||
} | ||
defer rc.Close() | ||
|
||
f, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) | ||
if err != nil { | ||
return err | ||
} | ||
defer f.Close() | ||
|
||
_, err = io.Copy(f, rc) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
err = f.Close() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return os.Rename(tmpPath, dst) | ||
} | ||
|
||
const ( | ||
lineMatchesPerFile = 3 | ||
fileMatchesPerSearch = 6 | ||
) | ||
|
||
func marshalMatches(w io.Writer, queryStr string, q query.Q, files []zoekt.FileMatch) { | ||
_, _ = fmt.Fprintf(w, "queryString: %s\n", queryStr) | ||
_, _ = fmt.Fprintf(w, "query: %s\n\n", q) | ||
|
||
files, hiddenFiles := splitAtIndex(files, fileMatchesPerSearch) | ||
for _, f := range files { | ||
_, _ = fmt.Fprintf(w, "%s/%s\t%s\n", f.Repository, f.FileName, f.Debug) | ||
|
||
lines, hidden := splitAtIndex(f.LineMatches, lineMatchesPerFile) | ||
|
||
for _, m := range lines { | ||
_, _ = fmt.Fprintf(w, "%d:%s\t%s\n", m.LineNumber, m.Line, m.DebugScore) | ||
} | ||
|
||
if len(hidden) > 0 { | ||
_, _ = fmt.Fprintf(w, "hidden %d more line matches\n", len(hidden)) | ||
} | ||
_, _ = fmt.Fprintln(w) | ||
} | ||
|
||
if len(hiddenFiles) > 0 { | ||
fmt.Fprintf(w, "hidden %d more file matches\n", len(hiddenFiles)) | ||
} | ||
} | ||
|
||
func splitAtIndex[E any](s []E, idx int) ([]E, []E) { | ||
if idx < len(s) { | ||
return s[:idx], s[idx:] | ||
} | ||
return s, nil | ||
} | ||
|
||
func requireCTags(tb testing.TB) { | ||
tb.Helper() | ||
|
||
if os.Getenv("CTAGS_COMMAND") != "" { | ||
return | ||
} | ||
if _, err := exec.LookPath("universal-ctags"); err == nil { | ||
return | ||
} | ||
|
||
// On CI we require ctags to be available. Otherwise we skip | ||
if os.Getenv("CI") != "" { | ||
tb.Fatal("universal-ctags is missing") | ||
} else { | ||
tb.Skip("universal-ctags is missing") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
queryString: graphql type User | ||
query: (and substr:"graphql" substr:"type" case_substr:"User") | ||
|
||
github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/schema.graphql score:8807.21 <- atom(4):300.00, fragment:8500.00, doc-order:7.21 | ||
6376:type User implements Node & SettingsSubject & Namespace { score:8500.00 <- WordMatch:500.00, Symbol:7000.00, kind:GraphQL:type:1000.00 | ||
3862: type: GitRefType score:8050.00 <- WordMatch:500.00, Symbol:7000.00, kind:GraphQL:field:550.00 | ||
5037: type: GitRefType! score:8050.00 <- WordMatch:500.00, Symbol:7000.00, kind:GraphQL:field:550.00 | ||
hidden 460 more line matches | ||
|
||
github.com/sourcegraph/sourcegraph/internal/types/types.go score:8759.73 <- atom(4):300.00, fragment:8450.00, doc-order:9.73 | ||
850:type User struct { score:8450.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:struct:950.00 | ||
1372: Type *SearchCountStatistics score:8250.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:member:750.00 | ||
1766: Type string score:8250.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:member:750.00 | ||
hidden 234 more line matches | ||
|
||
github.com/sourcegraph/sourcegraph/client/web/src/enterprise/insights/core/backend/gql-backend/methods/get-dashboard-owners.ts score:8269.38 <- atom(3):266.67, fragment:8000.00, doc-order:2.71 | ||
22: type: InsightsDashboardOwnerType.Global, score:8000.00 <- WordMatch:500.00, Symbol:7000.00, kind:TypeScript:constant:500.00 | ||
32: type: InsightsDashboardOwnerType.Personal, score:8000.00 <- WordMatch:500.00, Symbol:7000.00, kind:TypeScript:constant:500.00 | ||
18: const { currentUser, site } = data score:6500.00 <- WordMatch:500.00, EdgeSymbol:5500.00, kind:TypeScript:constant:500.00 | ||
hidden 8 more line matches | ||
|
||
github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/apitest/types.go score:8751.64 <- atom(4):300.00, fragment:8450.00, doc-order:1.64 | ||
47:type User struct { score:8450.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:struct:950.00 | ||
9: Typename string `json:"__typename"` score:6300.00 <- PartialWordMatch:50.00, EdgeSymbol:5500.00, kind:Go:member:750.00 | ||
32: Typename string `json:"__typename"` score:6300.00 <- PartialWordMatch:50.00, EdgeSymbol:5500.00, kind:Go:member:750.00 | ||
hidden 11 more line matches | ||
|
||
github.com/sourcegraph/sourcegraph/cmd/frontend/internal/batches/resolvers/apitest/types.go score:8751.15 <- atom(4):300.00, fragment:8450.00, doc-order:1.15 | ||
52:type User struct { score:8450.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:struct:950.00 | ||
364: User *User score:8250.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:member:750.00 | ||
393: Type string score:8250.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:member:750.00 | ||
hidden 68 more line matches | ||
|
||
github.com/sourcegraph/sourcegraph/internal/extsvc/github/common.go score:8725.50 <- atom(3):266.67, fragment:8450.00, doc-order:8.84 | ||
2030:type User struct { score:8450.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:struct:950.00 | ||
66: User *Actor `json:"User,omitempty"` score:8250.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:member:750.00 | ||
527: Type string score:8250.00 <- WordMatch:500.00, Symbol:7000.00, kind:Go:member:750.00 | ||
hidden 136 more line matches | ||
|
||
hidden 743 more file matches |