Rename UseKeywordScoring to mention BM25

sourcegraph · May 7, 2024 · 9926a33 · 9926a33
1 parent 9f35cb1
commit 9926a33
Show file tree

Hide file tree

Showing 8 changed files with 28 additions and 28 deletions.
diff --git a/api.go b/api.go
@@ -946,10 +946,10 @@ type SearchOptions struct {
 	// will be used. This option is temporary and is only exposed for testing/ tuning purposes.
 	DocumentRanksWeight float64
 
-	// EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
-	// Currently, this treats each match in a file as a term and computes an approximation to BM25.
-	// When enabled, all other scoring signals are ignored, including document ranks.
-	UseKeywordScoring bool
+	// EXPERIMENTAL. If true, use text-search style scoring instead of the default scoring formula.
+	// The scoring algorithm treats each match in a file as a term and computes an approximation to
+	// BM25. When enabled, all other scoring signals are ignored, including document ranks.
+	UseBM25Scoring bool
 
 	// Trace turns on opentracing for this request if true and if the Jaeger address was provided as
 	// a command-line flag
@@ -1015,7 +1015,7 @@ func (s *SearchOptions) String() string {
 	addBool("Whole", s.Whole)
 	addBool("ChunkMatches", s.ChunkMatches)
 	addBool("UseDocumentRanks", s.UseDocumentRanks)
-	addBool("UseKeywordScoring", s.UseKeywordScoring)
+	addBool("UseBM25Scoring", s.UseBM25Scoring)
 	addBool("Trace", s.Trace)
 	addBool("DebugScore", s.DebugScore)
 

diff --git a/api_proto.go b/api_proto.go
@@ -700,7 +700,7 @@ func SearchOptionsFromProto(p *proto.SearchOptions) *SearchOptions {
 		DocumentRanksWeight:    p.GetDocumentRanksWeight(),
 		Trace:                  p.GetTrace(),
 		DebugScore:             p.GetDebugScore(),
-		UseKeywordScoring:      p.GetUseKeywordScoring(),
+		UseBM25Scoring:         p.GetUseBM25Scoring(),
 	}
 }
 
@@ -725,6 +725,6 @@ func (s *SearchOptions) ToProto() *proto.SearchOptions {
 		DocumentRanksWeight:    s.DocumentRanksWeight,
 		Trace:                  s.Trace,
 		DebugScore:             s.DebugScore,
-		UseKeywordScoring:      s.UseKeywordScoring,
+		UseBM25Scoring:         s.UseBM25Scoring,
 	}
 }
diff --git a/build/scoring_test.go b/build/scoring_test.go
@@ -77,7 +77,7 @@ func TestBM25(t *testing.T) {
 			query:    &query.Substring{Pattern: "example"},
 			content:  exampleJava,
 			language: "Java",
-			// keyword-score:1.69 (sum-tf: 7.00, length-ratio: 2.00)
+			// bm25-score:1.69 (sum-tf: 7.00, length-ratio: 2.00)
 			wantScore: 1.69,
 		}, {
 			// Matches only on content
@@ -89,7 +89,7 @@ func TestBM25(t *testing.T) {
 			}},
 			content:  exampleJava,
 			language: "Java",
-			// keyword-score:5.75 (sum-tf: 56.00, length-ratio: 2.00)
+			// bm25-score:5.75 (sum-tf: 56.00, length-ratio: 2.00)
 			wantScore: 5.75,
 		},
 		{
@@ -98,15 +98,15 @@ func TestBM25(t *testing.T) {
 			query:    &query.Substring{Pattern: "java"},
 			content:  exampleJava,
 			language: "Java",
-			// keyword-score:1.07 (sum-tf: 2.00, length-ratio: 2.00)
+			// bm25-score:1.07 (sum-tf: 2.00, length-ratio: 2.00)
 			wantScore: 1.07,
 		},
 		{
 			// Matches only on filename, and content is missing
 			fileName: "a/b/c/config.go",
 			query:    &query.Substring{Pattern: "config.go"},
 			language: "Go",
-			// keyword-score:1.91 (sum-tf: 2.00, length-ratio: 0.00)
+			// bm25-score:1.91 (sum-tf: 2.00, length-ratio: 0.00)
 			wantScore: 1.91,
 		},
 	}
@@ -584,7 +584,7 @@ func skipIfCTagsUnavailable(t *testing.T, parserType ctags.CTagsParserType) {
 	}
 }
 
-func checkScoring(t *testing.T, c scoreCase, keywordScoring bool, parserType ctags.CTagsParserType) {
+func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTagsParserType) {
 	skipIfCTagsUnavailable(t, parserType)
 
 	name := c.language
@@ -625,9 +625,9 @@ func checkScoring(t *testing.T, c scoreCase, keywordScoring bool, parserType cta
 		defer ss.Close()
 
 		srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{
-			UseKeywordScoring: keywordScoring,
-			ChunkMatches:      true,
-			DebugScore:        true})
+			UseBM25Scoring: useBM25,
+			ChunkMatches:   true,
+			DebugScore:     true})
 		if err != nil {
 			t.Fatal(err)
 		}

diff --git a/eval.go b/eval.go
@@ -317,7 +317,7 @@ nextFileMatch:
 			fileMatch.LineMatches = cp.fillMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore)
 		}
 
-		if opts.UseKeywordScoring {
+		if opts.UseBM25Scoring {
 			d.scoreFileUsingBM25(&fileMatch, nextDoc, finalCands, opts)
 		} else {
 			// Use the standard, non-experimental scoring method by default

diff --git a/grpc/protos/zoekt/webserver/v1/webserver.pb.go b/grpc/protos/zoekt/webserver/v1/webserver.pb.go
diff --git a/grpc/protos/zoekt/webserver/v1/webserver.proto b/grpc/protos/zoekt/webserver/v1/webserver.proto
@@ -107,10 +107,10 @@ message SearchOptions {
   // If set, the search results will contain debug information for scoring.
   bool debug_score = 14;
 
-  // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
+  // EXPERIMENTAL. If true, use text search scoring instead of the default scoring formula.
   // Currently, this treats each match in a file as a term and computes an approximation to BM25.
   // When enabled, all other scoring signals are ignored, including document ranks.
-  bool use_keyword_scoring = 15;
+  bool use_bm25_scoring = 15;
 }
 
 message ListRequest {

diff --git a/score.go b/score.go
@@ -39,9 +39,9 @@ func (m *FileMatch) addScore(what string, computed float64, raw float64, debugSc
 	m.Score += computed
 }
 
-func (m *FileMatch) addKeywordScore(score float64, sumTf float64, L float64, debugScore bool) {
+func (m *FileMatch) addBM25Score(score float64, sumTf float64, L float64, debugScore bool) {
 	if debugScore {
-		m.Debug += fmt.Sprintf("keyword-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L)
+		m.Debug += fmt.Sprintf("bm25-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L)
 	}
 	m.Score += score
 }
@@ -116,7 +116,7 @@ func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, kn
 }
 
 // scoreFileUsingBM25 computes a score for the file match using an approximation to BM25, the most common scoring
-// algorithm for keyword search: https://en.wikipedia.org/wiki/Okapi_BM25. It implements all parts of the formula
+// algorithm for text search: https://en.wikipedia.org/wiki/Okapi_BM25. It implements all parts of the formula
 // except inverse document frequency (idf), since we don't have access to global term frequency statistics.
 //
 // Filename matches count twice as much as content matches. This mimics a common text search strategy where you
@@ -160,5 +160,5 @@ func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands [
 		score += ((k + 1.0) * tf) / (k*(1.0-b+b*L) + tf)
 	}
 
-	fileMatch.addKeywordScore(score, sumTf, L, opts.DebugScore)
+	fileMatch.addBM25Score(score, sumTf, L, opts.DebugScore)
 }
diff --git a/shards/shards_test.go b/shards/shards_test.go
@@ -1087,7 +1087,7 @@ func TestAtomCountScore(t *testing.T) {
 	}
 }
 
-func TestUseKeywordScoring(t *testing.T) {
+func TestUseBM25Scoring(t *testing.T) {
 	b := testIndexBuilder(t,
 		&zoekt.Repository{},
 		zoekt.Document{Name: "f1", Content: []byte("one two two three")},
@@ -1103,7 +1103,7 @@ func TestUseKeywordScoring(t *testing.T) {
 		&query.Substring{Pattern: "three"})
 
 	opts := zoekt.SearchOptions{
-		UseKeywordScoring: true,
+		UseBM25Scoring: true,
 	}
 
 	results, err := ss.Search(context.Background(), q, &opts)