From cdb166526693c2c8c42bfc33700e622d57796590 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Thu, 25 Jan 2024 16:45:05 +0200 Subject: [PATCH] eval: prefer longer candidateMatch when removing overlaps (#727) When thinking about transforming queries like 'foo bar' into '(foo bar) or "foo bar"' we would want to keep the phrase candidateMatch and not throw it away in gatherMatches. By sorting longer matches before others that start at the same offset we end up keeping those. Note: this only affects ChunkMatch, since for LineMatch we merge when we find overlaps. Test Plan: This was quite hard to test with our existing e2e tests due to them not recording offsets, only matching lines. So instead I am just relying on the fact we didn't break anything and once we add proper support for phrases we will have a test then. --- eval.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eval.go b/eval.go index 902d06181..7808f733b 100644 --- a/eval.go +++ b/eval.go @@ -537,6 +537,10 @@ type sortByOffsetSlice []*candidateMatch func (m sortByOffsetSlice) Len() int { return len(m) } func (m sortByOffsetSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m sortByOffsetSlice) Less(i, j int) bool { + if m[i].byteOffset == m[j].byteOffset { // tie break if same offset + // Prefer longer candidates if starting at same position + return m[i].byteMatchSz > m[j].byteMatchSz + } return m[i].byteOffset < m[j].byteOffset }