Skip to content

Commit

Permalink
Rename UseKeywordScoring to mention BM25
Browse files Browse the repository at this point in the history
  • Loading branch information
jtibshirani committed May 7, 2024
1 parent 9f35cb1 commit 761bd1f
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 16 deletions.
10 changes: 5 additions & 5 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -946,10 +946,10 @@ type SearchOptions struct {
// will be used. This option is temporary and is only exposed for testing/ tuning purposes.
DocumentRanksWeight float64

// EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
// Currently, this treats each match in a file as a term and computes an approximation to BM25.
// When enabled, all other scoring signals are ignored, including document ranks.
UseKeywordScoring bool
// EXPERIMENTAL. If true, use text-search style scoring instead of the default scoring formula.
// The scoring algorithm treats each match in a file as a term and computes an approximation to
// BM25. When enabled, all other scoring signals are ignored, including document ranks.
UseBM25Scoring bool

// Trace turns on opentracing for this request if true and if the Jaeger address was provided as
// a command-line flag
Expand Down Expand Up @@ -1015,7 +1015,7 @@ func (s *SearchOptions) String() string {
addBool("Whole", s.Whole)
addBool("ChunkMatches", s.ChunkMatches)
addBool("UseDocumentRanks", s.UseDocumentRanks)
addBool("UseKeywordScoring", s.UseKeywordScoring)
addBool("UseBM25Scoring", s.UseBM25Scoring)
addBool("Trace", s.Trace)
addBool("DebugScore", s.DebugScore)

Expand Down
4 changes: 2 additions & 2 deletions api_proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ func SearchOptionsFromProto(p *proto.SearchOptions) *SearchOptions {
DocumentRanksWeight: p.GetDocumentRanksWeight(),
Trace: p.GetTrace(),
DebugScore: p.GetDebugScore(),
UseKeywordScoring: p.GetUseKeywordScoring(),
UseBM25Scoring: p.GetUseKeywordScoring(),
}
}

Expand All @@ -725,6 +725,6 @@ func (s *SearchOptions) ToProto() *proto.SearchOptions {
DocumentRanksWeight: s.DocumentRanksWeight,
Trace: s.Trace,
DebugScore: s.DebugScore,
UseKeywordScoring: s.UseKeywordScoring,
UseKeywordScoring: s.UseBM25Scoring,
}
}
6 changes: 3 additions & 3 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,9 +625,9 @@ func checkScoring(t *testing.T, c scoreCase, keywordScoring bool, parserType cta
defer ss.Close()

srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{
UseKeywordScoring: keywordScoring,
ChunkMatches: true,
DebugScore: true})
UseBM25Scoring: keywordScoring,
ChunkMatches: true,
DebugScore: true})
if err != nil {
t.Fatal(err)
}
Expand Down
4 changes: 2 additions & 2 deletions eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,12 @@ nextFileMatch:
finalCands := d.gatherMatches(nextDoc, mt, known, shouldMergeMatches)

if opts.ChunkMatches {
fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore)
fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, opts.UseBM25Scoring, fileMatch.Language, opts.DebugScore)
} else {
fileMatch.LineMatches = cp.fillMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore)
}

if opts.UseKeywordScoring {
if opts.UseBM25Scoring {
d.scoreFileUsingBM25(&fileMatch, nextDoc, finalCands, opts)
} else {
// Use the standard, non-experimental scoring method by default
Expand Down
6 changes: 3 additions & 3 deletions score.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ func (m *FileMatch) addScore(what string, computed float64, raw float64, debugSc
m.Score += computed
}

func (m *FileMatch) addKeywordScore(score float64, sumTf float64, L float64, debugScore bool) {
func (m *FileMatch) addBM25Score(score float64, sumTf float64, L float64, debugScore bool) {
if debugScore {
m.Debug += fmt.Sprintf("keyword-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L)
m.Debug += fmt.Sprintf("bm25-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L)
}
m.Score += score
}
Expand Down Expand Up @@ -160,5 +160,5 @@ func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands [
score += ((k + 1.0) * tf) / (k*(1.0-b+b*L) + tf)
}

fileMatch.addKeywordScore(score, sumTf, L, opts.DebugScore)
fileMatch.addBM25Score(score, sumTf, L, opts.DebugScore)
}
2 changes: 1 addition & 1 deletion shards/shards_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ func TestUseKeywordScoring(t *testing.T) {
&query.Substring{Pattern: "three"})

opts := zoekt.SearchOptions{
UseKeywordScoring: true,
UseBM25Scoring: true,
}

results, err := ss.Search(context.Background(), q, &opts)
Expand Down

0 comments on commit 761bd1f

Please sign in to comment.