From b35d8a202d1ecf524b5a6eb1c660ba6ce12f691f Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Fri, 12 Jan 2024 15:01:14 +0200 Subject: [PATCH] e2e: add ranking tests for atom boosting (#715) This adds four cases which are exact phrases to search for. "assets are not configured for this binary" finds the correct document, but it isn't shown in the summary. Our target rank doesn't capture that this could be better, but I will use the golden file to see if I improve this. "sourcegraph/server docker image build" is an example of an exact phrase which also happens to contain words of highly ranked symbols. This leads to the exact phrase getting buried. I want to see if I can boost that. "bufio flush writer" should find the symbol bufioFlushWriter. "coverage data writer" should find the symbol CoverageDataWriter. Test Plan: go test --- internal/e2e/e2e_rank_test.go | 8 ++++ ...ets_are_not_configured_for_this_binary.txt | 41 +++++++++++++++++++ internal/e2e/testdata/bufio_flush_writer.txt | 41 +++++++++++++++++++ .../e2e/testdata/coverage_data_writer.txt | 41 +++++++++++++++++++ internal/e2e/testdata/rank_stats.txt | 8 ++-- .../sourcegraphserver_docker_image_build.txt | 41 +++++++++++++++++++ 6 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 internal/e2e/testdata/assets_are_not_configured_for_this_binary.txt create mode 100644 internal/e2e/testdata/bufio_flush_writer.txt create mode 100644 internal/e2e/testdata/coverage_data_writer.txt create mode 100644 internal/e2e/testdata/sourcegraphserver_docker_image_build.txt diff --git a/internal/e2e/e2e_rank_test.go b/internal/e2e/e2e_rank_test.go index 6e0e4b204..f4fbe99f5 100644 --- a/internal/e2e/e2e_rank_test.go +++ b/internal/e2e/e2e_rank_test.go @@ -61,6 +61,14 @@ func TestRanking(t *testing.T) { // cody q("generate unit test", "github.com/sourcegraph/cody/lib/shared/src/chat/recipes/generate-test.ts"), q("r:cody sourcegraph url", "github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/graphql/client.ts"), + + // exact phrases + q("assets are not configured for this binary", "github.com/sourcegraph/sourcegraph/ui/assets/assets.go"), + q("sourcegraph/server docker image build", "github.com/sourcegraph/sourcegraph/dev/tools.go"), + + // symbols split up + q("bufio flush writer", "github.com/golang/go/src/net/http/transfer.go"), // bufioFlushWriter + q("coverage data writer", "github.com/golang/go/src/internal/coverage/encodecounter/encode.go"), // CoverageDataWriter } var indexDir string diff --git a/internal/e2e/testdata/assets_are_not_configured_for_this_binary.txt b/internal/e2e/testdata/assets_are_not_configured_for_this_binary.txt new file mode 100644 index 000000000..a501118e4 --- /dev/null +++ b/internal/e2e/testdata/assets_are_not_configured_for_this_binary.txt @@ -0,0 +1,41 @@ +queryString: assets are not configured for this binary +query: (and substr:"assets" substr:"are" substr:"not" substr:"configured" substr:"for" substr:"this" substr:"binary") +targetRank: 1 + +**github.com/sourcegraph/sourcegraph/ui/assets/assets.go** +33:func (p FailingAssetsProvider) Assets() http.FileSystem { +14: Assets() http.FileSystem +1:package assets +hidden 12 more line matches + +github.com/sourcegraph/sourcegraph/schema/schema.go +492:type BrandAssets struct { +1530:type Notice struct { +1538:type Notifications struct { +hidden 668 more line matches + +github.com/sourcegraph/sourcegraph/doc/admin/executors/deploy_executors.md +118:## Confirm executors are working +194:### Configuring the auth config for use in executors +216:### Adding certificates to a binary deployment +hidden 47 more line matches + +github.com/sourcegraph/sourcegraph/doc/getting-started/github-vs-sourcegraph.md +8:## Which is best for you? +110:### Searching repositories, branches, and forks +18:As your codebase grows in complexity, the value of code search quickly increases. Sourcegraph may be a good fit for your team if: +hidden 66 more line matches + +github.com/sourcegraph/sourcegraph/doc/admin/executors/deploy_executors_terraform.md +1:# Deploying Sourcegraph executors using Terraform on AWS or GCP +56:## Terraform Version +415:### **Step 1:** Update the source version of the terraform modules +hidden 68 more line matches + +github.com/sourcegraph/sourcegraph/doc/dev/background-information/sg/reference.md +496:### sg lint format +505:### sg lint format +1: +hidden 265 more line matches + +hidden 3 more file matches diff --git a/internal/e2e/testdata/bufio_flush_writer.txt b/internal/e2e/testdata/bufio_flush_writer.txt new file mode 100644 index 000000000..5091b9ec4 --- /dev/null +++ b/internal/e2e/testdata/bufio_flush_writer.txt @@ -0,0 +1,41 @@ +queryString: bufio flush writer +query: (and substr:"bufio" substr:"flush" substr:"writer") +targetRank: 25 + +github.com/golang/go/src/image/gif/writer.go +43:type writer interface { +77:func (b blockWriter) Flush() error { +123:func (e *encoder) flush() { +hidden 28 more line matches + +github.com/golang/go/src/image/jpeg/writer.go +211:type writer interface { +231:func (e *encoder) flush() { +212: Flush() error +hidden 11 more line matches + +github.com/golang/go/src/compress/lzw/writer.go +15:type writer interface { +36:type Writer struct { +17: Flush() error +hidden 36 more line matches + +github.com/golang/go/src/bufio/bufio.go +579:type Writer struct { +635:func (b *Writer) Flush() error { +836: *Writer +hidden 72 more line matches + +github.com/golang/go/src/archive/zip/writer.go +24:type Writer struct { +61:func (w *Writer) Flush() error { +607: io.Writer +hidden 55 more line matches + +github.com/golang/go/src/encoding/csv/writer.go +30:type Writer struct { +123:func (w *Writer) Flush() { +37:func NewWriter(w io.Writer) *Writer { +hidden 25 more line matches + +hidden 77 more file matches diff --git a/internal/e2e/testdata/coverage_data_writer.txt b/internal/e2e/testdata/coverage_data_writer.txt new file mode 100644 index 000000000..a4a094f7c --- /dev/null +++ b/internal/e2e/testdata/coverage_data_writer.txt @@ -0,0 +1,41 @@ +queryString: coverage data writer +query: (and substr:"coverage" substr:"data" substr:"writer") +targetRank: 13 + +github.com/golang/go/src/internal/coverage/stringtab/stringtab.go +19:type Writer struct { +27:func (stw *Writer) InitWriter() { +9: "internal/coverage/slicereader" +hidden 16 more line matches + +github.com/golang/go/src/cmd/cover/func.go +149:func (f *FuncExtent) coverage(profile *cover.Profile) (num, den int64) { +30:// funcOutput takes two file names as arguments, a coverage profile to read as input and an output +32:// as output the coverage data broken down by function, like this: +hidden 8 more line matches + +github.com/golang/go/src/cmd/cover/html.go +199: Coverage float64 +170:type templateData struct { +21:// htmlOutput reads the profile data from profile and generates an HTML +hidden 18 more line matches + +github.com/golang/go/src/internal/fuzz/fuzz.go +474: Data []byte +487:func corpusEntryData(ce CorpusEntry) ([]byte, error) { +908:func (c *coordinator) updateCoverage(newCoverage []byte) int { +hidden 91 more line matches + +github.com/golang/go/src/testing/fuzz.go +93: Data []byte +205:// modify the underlying data of the arguments provided by the fuzzing engine. +275: run := func(captureOut io.Writer, e corpusEntry) (ok bool) { +hidden 7 more line matches + +github.com/golang/go/src/cmd/vendor/golang.org/x/sys/unix/ztypes_linux.go +227: Data [7]byte +449: Data [8]uint32 +2384: Data *byte +hidden 85 more line matches + +hidden 35 more file matches diff --git a/internal/e2e/testdata/rank_stats.txt b/internal/e2e/testdata/rank_stats.txt index f2b464109..a7b18c582 100644 --- a/internal/e2e/testdata/rank_stats.txt +++ b/internal/e2e/testdata/rank_stats.txt @@ -1,4 +1,4 @@ -queries: 9 -recall@1: 5 (56%) -recall@5: 7 (78%) -mrr: 0.658249 +queries: 13 +recall@1: 6 (46%) +recall@5: 8 (62%) +mrr: 0.547123 diff --git a/internal/e2e/testdata/sourcegraphserver_docker_image_build.txt b/internal/e2e/testdata/sourcegraphserver_docker_image_build.txt new file mode 100644 index 000000000..b1c431e98 --- /dev/null +++ b/internal/e2e/testdata/sourcegraphserver_docker_image_build.txt @@ -0,0 +1,41 @@ +queryString: sourcegraph/server docker image build +query: (and substr:"sourcegraph/server" substr:"docker" substr:"image" substr:"build") +targetRank: 14 + +github.com/sourcegraph/sourcegraph/dev/sg/internal/images/images.go +458: Build int +234:type ImageReference struct { +352:type ErrNoImage struct { +hidden 118 more line matches + +github.com/sourcegraph/sourcegraph/doc/admin/external_services/postgres.md +41:### sourcegraph/server +192:### sourcegraph/server +53:### Docker Compose +hidden 19 more line matches + +github.com/sourcegraph/sourcegraph/internal/conf/deploy/deploytype.go +66:func IsDeployTypePureDocker(deployType string) bool { +12: SingleDocker = "docker-container" +13: DockerCompose = "docker-compose" +hidden 19 more line matches + +github.com/sourcegraph/sourcegraph/schema/schema.go +2621: ExecutorsBatcheshelperImage string `json:"executors.batcheshelperImage,omitempty"` +2627: ExecutorsLsifGoImage string `json:"executors.lsifGoImage,omitempty"` +2631: ExecutorsSrcCLIImage string `json:"executors.srcCLIImage,omitempty"` +hidden 22 more line matches + +github.com/sourcegraph/sourcegraph/internal/updatecheck/handler.go +40: latestReleaseDockerServerImageBuild = newPingResponse("5.1.8") +45: latestReleaseKubernetesBuild = newPingResponse("5.1.8") +50: latestReleaseDockerComposeOrPureDocker = newPingResponse("5.1.8") +hidden 19 more line matches + +github.com/sourcegraph/sourcegraph/doc/admin/deploy/docker-single-container/index.md +1:# Docker Single Container Deployment +294:### Insiders build +238:### File system performance on Docker for Mac +hidden 52 more line matches + +hidden 15 more file matches