From 640102a4a30e3913b4d498a465bb2ecd4f9cb7a3 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 28 May 2024 14:51:34 -0700 Subject: [PATCH] Increase filename boost (#785) When we introduced filename boosting in BM25, we set it to a very conservative weight. This PR increases the weight from 2.0 -> 5.0, which improves results on relevant evals. Relates to SPLF-88 --- build/scoring_test.go | 6 +++--- score.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build/scoring_test.go b/build/scoring_test.go index 03a13928..e4e2e51e 100644 --- a/build/scoring_test.go +++ b/build/scoring_test.go @@ -78,7 +78,7 @@ func TestBM25(t *testing.T) { content: exampleJava, language: "Java", // bm25-score:1.69 (sum-tf: 7.00, length-ratio: 2.00) - wantScore: 1.69, + wantScore: 1.82, }, { // Matches only on content fileName: "example.java", @@ -99,7 +99,7 @@ func TestBM25(t *testing.T) { content: exampleJava, language: "Java", // bm25-score:1.07 (sum-tf: 2.00, length-ratio: 2.00) - wantScore: 1.07, + wantScore: 1.55, }, { // Matches only on filename, and content is missing @@ -107,7 +107,7 @@ func TestBM25(t *testing.T) { query: &query.Substring{Pattern: "config.go"}, language: "Go", // bm25-score:1.91 (sum-tf: 2.00, length-ratio: 0.00) - wantScore: 1.91, + wantScore: 2.08, }, } diff --git a/score.go b/score.go index dc0f4c19..9bcf1bbc 100644 --- a/score.go +++ b/score.go @@ -132,7 +132,7 @@ func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands [ term := string(cand.substrLowered) if cand.fileName { - termFreqs[term] += 2 + termFreqs[term] += 5 } else { termFreqs[term]++ }