Skip to content

Commit

Permalink
Increase filename boost (#785)
Browse files Browse the repository at this point in the history
When we introduced filename boosting in BM25, we set it to a very conservative
weight. This PR increases the weight from 2.0 -> 5.0, which improves results on
relevant evals.

Relates to SPLF-88
  • Loading branch information
jtibshirani authored May 28, 2024
1 parent df7a7e7 commit 640102a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func TestBM25(t *testing.T) {
content: exampleJava,
language: "Java",
// bm25-score:1.69 (sum-tf: 7.00, length-ratio: 2.00)
wantScore: 1.69,
wantScore: 1.82,
}, {
// Matches only on content
fileName: "example.java",
Expand All @@ -99,15 +99,15 @@ func TestBM25(t *testing.T) {
content: exampleJava,
language: "Java",
// bm25-score:1.07 (sum-tf: 2.00, length-ratio: 2.00)
wantScore: 1.07,
wantScore: 1.55,
},
{
// Matches only on filename, and content is missing
fileName: "a/b/c/config.go",
query: &query.Substring{Pattern: "config.go"},
language: "Go",
// bm25-score:1.91 (sum-tf: 2.00, length-ratio: 0.00)
wantScore: 1.91,
wantScore: 2.08,
},
}

Expand Down
2 changes: 1 addition & 1 deletion score.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands [
term := string(cand.substrLowered)

if cand.fileName {
termFreqs[term] += 2
termFreqs[term] += 5
} else {
termFreqs[term]++
}
Expand Down

0 comments on commit 640102a

Please sign in to comment.