From cdb166526693c2c8c42bfc33700e622d57796590 Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Thu, 25 Jan 2024 16:45:05 +0200
Subject: [PATCH] eval: prefer longer candidateMatch when removing overlaps
 (#727)

When thinking about transforming queries like 'foo bar' into '(foo bar)
or "foo bar"' we would want to keep the phrase candidateMatch and not
throw it away in gatherMatches. By sorting longer matches before others
that start at the same offset we end up keeping those.

Note: this only affects ChunkMatch, since for LineMatch we merge when we
find overlaps.

Test Plan: This was quite hard to test with our existing e2e tests due
to them not recording offsets, only matching lines. So instead I am just
relying on the fact we didn't break anything and once we add proper
support for phrases we will have a test then.
---
 eval.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/eval.go b/eval.go
index 902d06181..7808f733b 100644
--- a/eval.go
+++ b/eval.go
@@ -537,6 +537,10 @@ type sortByOffsetSlice []*candidateMatch
 func (m sortByOffsetSlice) Len() int      { return len(m) }
 func (m sortByOffsetSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
 func (m sortByOffsetSlice) Less(i, j int) bool {
+	if m[i].byteOffset == m[j].byteOffset { // tie break if same offset
+		// Prefer longer candidates if starting at same position
+		return m[i].byteMatchSz > m[j].byteMatchSz
+	}
 	return m[i].byteOffset < m[j].byteOffset
 }