diff --git a/contentprovider.go b/contentprovider.go
index b1fe0ab8..68c41cfe 100644
--- a/contentprovider.go
+++ b/contentprovider.go
@@ -169,6 +169,12 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int,
 	return result
 }
 
+// fillChunkMatches converts the internal candidateMatch slice into our APIs ChunkMatch.
+//
+// Performance invariant: ms is sorted and non-overlapping.
+//
+// Note: the byte slices may be backed by mmapped data, so before being
+// returned by the API it needs to be copied.
 func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch {
 	var result []ChunkMatch
 	if ms[0].fileName {
@@ -290,8 +296,20 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin
 
 func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int) []ChunkMatch {
 	newlines := p.newlines()
-	chunks := chunkCandidates(ms, newlines, numContextLines)
 	data := p.data(false)
+
+	// columnHelper prevents O(len(ms) * len(data)) lookups for all columns.
+	// However, it depends on ms being sorted by byteOffset and non-overlapping.
+	// This invariant is true at the time of writing, but we conservatively
+	// enforce this. Note: chunkCandidates preserves the sorting so safe to
+	// transform now.
+	columnHelper := columnHelper{data: data}
+	if !sort.IsSorted((sortByOffsetSlice)(ms)) {
+		log.Printf("WARN: performance invariant violated. candidate matches are not sorted in fillContentChunkMatches. Report to developers.")
+		sort.Sort((sortByOffsetSlice)(ms))
+	}
+
+	chunks := chunkCandidates(ms, newlines, numContextLines)
 	chunkMatches := make([]ChunkMatch, 0, len(chunks))
 	for _, chunk := range chunks {
 		ranges := make([]Range, 0, len(chunk.candidates))
@@ -306,12 +324,12 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte
 				Start: Location{
 					ByteOffset: startOffset,
 					LineNumber: uint32(startLine),
-					Column:     uint32(utf8.RuneCount(data[startLineOffset:startOffset]) + 1),
+					Column:     columnHelper.get(startLineOffset, startOffset),
 				},
 				End: Location{
 					ByteOffset: endOffset,
 					LineNumber: uint32(endLine),
-					Column:     uint32(utf8.RuneCount(data[endLineOffset:endOffset]) + 1),
+					Column:     columnHelper.get(endLineOffset, endOffset),
 				},
 			})
 
@@ -361,6 +379,9 @@ type candidateChunk struct {
 // chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
 // chunks will be merged if adding `numContextLines` to the beginning and end of the chunk would cause
 // it to overlap with an adjacent chunk.
+//
+// input invariants: ms is sorted by byteOffset and is non overlapping with respect to endOffset.
+// output invariants: if you flatten candidates the input invariant is retained.
 func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk {
 	var chunks []candidateChunk
 	for _, m := range ms {
@@ -392,6 +413,41 @@ func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines in
 	return chunks
 }
 
+// columnHelper is a helper struct which caches the number of runes last
+// counted. If we naively use utf8.RuneCount for each match on a line, this
+// leads to an O(nm) algorithm where m is the number of matches and n is the
+// length of the line. Aassuming we our candidates are increasing in offset
+// makes this operation O(n) instead.
+type columnHelper struct {
+	data []byte
+
+	// 0 values for all these are valid values
+	lastLineOffset int
+	lastOffset     uint32
+	lastRuneCount  uint32
+}
+
+// get returns the line column for offset. offset is the byte offset of the
+// rune in data. lineOffset is the byte offset inside of data for the line
+// containing offset.
+func (c *columnHelper) get(lineOffset int, offset uint32) uint32 {
+	var runeCount uint32
+
+	if lineOffset == c.lastLineOffset && offset >= c.lastOffset {
+		// Can count from last calculation
+		runeCount = c.lastRuneCount + uint32(utf8.RuneCount(c.data[c.lastOffset:offset]))
+	} else {
+		// Need to count from the beginning of line
+		runeCount = uint32(utf8.RuneCount(c.data[lineOffset:offset]))
+	}
+
+	c.lastLineOffset = lineOffset
+	c.lastOffset = offset
+	c.lastRuneCount = runeCount
+
+	return runeCount + 1
+}
+
 type newlines struct {
 	// locs is the sorted set of byte offsets of the newlines in the file
 	locs []uint32
diff --git a/contentprovider_test.go b/contentprovider_test.go
index deb31b09..d811f788 100644
--- a/contentprovider_test.go
+++ b/contentprovider_test.go
@@ -4,6 +4,8 @@ import (
 	"bytes"
 	"fmt"
 	"testing"
+	"testing/quick"
+	"unicode/utf8"
 
 	"github.com/google/go-cmp/cmp"
 )
@@ -327,3 +329,81 @@ func TestChunkMatches(t *testing.T) {
 		})
 	}
 }
+
+func BenchmarkColumnHelper(b *testing.B) {
+	// We simulate looking up columns of evenly spaced matches
+	const matches = 10_000
+	const match = "match"
+	const space = "         "
+	const dist = uint32(len(match) + len(space))
+	data := bytes.Repeat([]byte(match+space), matches)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		columnHelper := columnHelper{data: data}
+
+		lineOffset := 0
+		offset := uint32(0)
+		for offset < uint32(len(data)) {
+			col := columnHelper.get(lineOffset, offset)
+			if col != offset+1 {
+				b.Fatal("column is not offset even though data is ASCII")
+			}
+			offset += dist
+		}
+	}
+}
+
+func TestColumnHelper(t *testing.T) {
+	f := func(line0, line1 string) bool {
+		data := []byte(line0 + line1)
+		lineOffset := len(line0)
+
+		columnHelper := columnHelper{data: data}
+
+		// We check every second rune returns the correct answer
+		offset := lineOffset
+		column := 1
+		for offset < len(data) {
+			if column%2 == 0 {
+				got := columnHelper.get(lineOffset, uint32(offset))
+				if got != uint32(column) {
+					return false
+				}
+			}
+			_, size := utf8.DecodeRune(data[offset:])
+			offset += size
+			column++
+		}
+
+		return true
+	}
+
+	if err := quick.Check(f, nil); err != nil {
+		t.Fatal(err)
+	}
+
+	// Corner cases
+
+	// empty data, shouldn't happen but just in case it slips through
+	ch := columnHelper{data: nil}
+	if got := ch.get(0, 0); got != 1 {
+		t.Fatal("empty data didn't return 1", got)
+	}
+
+	// Repeating a call to get should return the same value
+	// empty data, shouldn't happen but just in case it slips through
+	ch = columnHelper{data: []byte("hello\nworld")}
+	if got := ch.get(6, 8); got != 3 {
+		t.Fatal("unexpected value for third column on second line", got)
+	}
+	if got := ch.get(6, 8); got != 3 {
+		t.Fatal("unexpected value for repeated call for third column on second line", got)
+	}
+
+	// Now make sure if we go backwards we do not incorrectly use the cache
+	if got := ch.get(6, 6); got != 1 {
+		t.Fatal("unexpected value for backwards call for first column on second line", got)
+	}
+}
diff --git a/eval.go b/eval.go
index 6d938c63..902d0618 100644
--- a/eval.go
+++ b/eval.go
@@ -332,6 +332,9 @@ nextFileMatch:
 			}
 		}
 
+		// Important invariant for performance: finalCands is sorted by offset and
+		// non-overlapping. gatherMatches respects this invariant and all later
+		// transformations respect this.
 		shouldMergeMatches := !opts.ChunkMatches
 		finalCands := gatherMatches(mt, known, shouldMergeMatches)