From 761bd1f02cfe73009edf1e31554c991234626ae3 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 7 May 2024 10:05:37 -0700 Subject: [PATCH] Rename UseKeywordScoring to mention BM25 --- api.go | 10 +++++----- api_proto.go | 4 ++-- build/scoring_test.go | 6 +++--- eval.go | 4 ++-- score.go | 6 +++--- shards/shards_test.go | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/api.go b/api.go index 6d2497d1..192b6a83 100644 --- a/api.go +++ b/api.go @@ -946,10 +946,10 @@ type SearchOptions struct { // will be used. This option is temporary and is only exposed for testing/ tuning purposes. DocumentRanksWeight float64 - // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. - // Currently, this treats each match in a file as a term and computes an approximation to BM25. - // When enabled, all other scoring signals are ignored, including document ranks. - UseKeywordScoring bool + // EXPERIMENTAL. If true, use text-search style scoring instead of the default scoring formula. + // The scoring algorithm treats each match in a file as a term and computes an approximation to + // BM25. When enabled, all other scoring signals are ignored, including document ranks. + UseBM25Scoring bool // Trace turns on opentracing for this request if true and if the Jaeger address was provided as // a command-line flag @@ -1015,7 +1015,7 @@ func (s *SearchOptions) String() string { addBool("Whole", s.Whole) addBool("ChunkMatches", s.ChunkMatches) addBool("UseDocumentRanks", s.UseDocumentRanks) - addBool("UseKeywordScoring", s.UseKeywordScoring) + addBool("UseBM25Scoring", s.UseBM25Scoring) addBool("Trace", s.Trace) addBool("DebugScore", s.DebugScore) diff --git a/api_proto.go b/api_proto.go index 7e17b8ca..ccfccb0f 100644 --- a/api_proto.go +++ b/api_proto.go @@ -700,7 +700,7 @@ func SearchOptionsFromProto(p *proto.SearchOptions) *SearchOptions { DocumentRanksWeight: p.GetDocumentRanksWeight(), Trace: p.GetTrace(), DebugScore: p.GetDebugScore(), - UseKeywordScoring: p.GetUseKeywordScoring(), + UseBM25Scoring: p.GetUseKeywordScoring(), } } @@ -725,6 +725,6 @@ func (s *SearchOptions) ToProto() *proto.SearchOptions { DocumentRanksWeight: s.DocumentRanksWeight, Trace: s.Trace, DebugScore: s.DebugScore, - UseKeywordScoring: s.UseKeywordScoring, + UseKeywordScoring: s.UseBM25Scoring, } } diff --git a/build/scoring_test.go b/build/scoring_test.go index 9bf243fc..7aea8e00 100644 --- a/build/scoring_test.go +++ b/build/scoring_test.go @@ -625,9 +625,9 @@ func checkScoring(t *testing.T, c scoreCase, keywordScoring bool, parserType cta defer ss.Close() srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{ - UseKeywordScoring: keywordScoring, - ChunkMatches: true, - DebugScore: true}) + UseBM25Scoring: keywordScoring, + ChunkMatches: true, + DebugScore: true}) if err != nil { t.Fatal(err) } diff --git a/eval.go b/eval.go index cc0e61f2..2b97d054 100644 --- a/eval.go +++ b/eval.go @@ -312,12 +312,12 @@ nextFileMatch: finalCands := d.gatherMatches(nextDoc, mt, known, shouldMergeMatches) if opts.ChunkMatches { - fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore) + fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, opts.UseBM25Scoring, fileMatch.Language, opts.DebugScore) } else { fileMatch.LineMatches = cp.fillMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore) } - if opts.UseKeywordScoring { + if opts.UseBM25Scoring { d.scoreFileUsingBM25(&fileMatch, nextDoc, finalCands, opts) } else { // Use the standard, non-experimental scoring method by default diff --git a/score.go b/score.go index 115eabd2..25beef5b 100644 --- a/score.go +++ b/score.go @@ -39,9 +39,9 @@ func (m *FileMatch) addScore(what string, computed float64, raw float64, debugSc m.Score += computed } -func (m *FileMatch) addKeywordScore(score float64, sumTf float64, L float64, debugScore bool) { +func (m *FileMatch) addBM25Score(score float64, sumTf float64, L float64, debugScore bool) { if debugScore { - m.Debug += fmt.Sprintf("keyword-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L) + m.Debug += fmt.Sprintf("bm25-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L) } m.Score += score } @@ -160,5 +160,5 @@ func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands [ score += ((k + 1.0) * tf) / (k*(1.0-b+b*L) + tf) } - fileMatch.addKeywordScore(score, sumTf, L, opts.DebugScore) + fileMatch.addBM25Score(score, sumTf, L, opts.DebugScore) } diff --git a/shards/shards_test.go b/shards/shards_test.go index 5c4ddc73..9b7d37b4 100644 --- a/shards/shards_test.go +++ b/shards/shards_test.go @@ -1103,7 +1103,7 @@ func TestUseKeywordScoring(t *testing.T) { &query.Substring{Pattern: "three"}) opts := zoekt.SearchOptions{ - UseKeywordScoring: true, + UseBM25Scoring: true, } results, err := ss.Search(context.Background(), q, &opts)