From ecf149a2de8726851dd37edd4140d22cfc661f80 Mon Sep 17 00:00:00 2001 From: Stefan Hengl Date: Tue, 30 Apr 2024 16:32:16 +0200 Subject: [PATCH] fix: don't modify finalCands Since a recent change, we use finalCands in our BM25 scoring, however finalCands is modified in fillChunkMatches which led to suprising scores. Test plan: updated unit test --- build/scoring_test.go | 4 ++-- contentprovider.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build/scoring_test.go b/build/scoring_test.go index e3941fecd..9bf243fc3 100644 --- a/build/scoring_test.go +++ b/build/scoring_test.go @@ -77,8 +77,8 @@ func TestBM25(t *testing.T) { query: &query.Substring{Pattern: "example"}, content: exampleJava, language: "Java", - // keyword-score:1.63 (sum-tf: 6.00, length-ratio: 2.00) - wantScore: 1.63, + // keyword-score:1.69 (sum-tf: 7.00, length-ratio: 2.00) + wantScore: 1.69, }, { // Matches only on content fileName: "example.java", diff --git a/contentprovider.go b/contentprovider.go index c66b1f471..e4a1ce1db 100644 --- a/contentprovider.go +++ b/contentprovider.go @@ -147,7 +147,7 @@ func (p *contentProvider) findOffset(filename bool, r uint32) uint32 { // returned by the API it needs to be copied. func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch { var filenameMatches []*candidateMatch - contentMatches := ms[:0] + contentMatches := make([]*candidateMatch, 0, len(ms)) for _, m := range ms { if m.fileName { @@ -194,7 +194,7 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, // returned by the API it needs to be copied. func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch { var filenameMatches []*candidateMatch - contentMatches := ms[:0] + contentMatches := make([]*candidateMatch, 0, len(ms)) for _, m := range ms { if m.fileName {