Skip to content

Commit

Permalink
rfc: switch to enum for matches signature
Browse files Browse the repository at this point in the history
I am looking for feedback on this change before improving
documentation/etc. Any feedback if this is worthwhile or on naming
please.

Test Plan: go test ./...
  • Loading branch information
keegancsmith committed Nov 14, 2023
1 parent e068116 commit 06e9071
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 70 deletions.
15 changes: 8 additions & 7 deletions eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,15 +291,16 @@ nextFileMatch:
md := d.repoMetaData[d.repos[nextDoc]]

for cost := costMin; cost <= costMax; cost++ {
v, ok := mt.matches(cp, cost, known)
if ok && !v {
switch mt.matches(cp, cost, known) {
case matchesRequiresHigherCost:
if cost == costMax {
log.Panicf("did not decide. Repo %s, doc %d, known %v",
md.Name, nextDoc, known)
}
case matchesFound:
case matchesNone:
continue nextFileMatch
}

if cost == costMax && !ok {
log.Panicf("did not decide. Repo %s, doc %d, known %v",
md.Name, nextDoc, known)
}
}

fileMatch := FileMatch{
Expand Down
4 changes: 2 additions & 2 deletions matchiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ func (t *noMatchTree) nextDoc() uint32 {

func (t *noMatchTree) prepare(uint32) {}

func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
return false, true
func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
return matchesNone
}

func (t *noMatchTree) updateStats(s *Stats) {
Expand Down
158 changes: 97 additions & 61 deletions matchtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,25 @@ const (
costMax = costRegexp
)

type matchesState uint8

const (
matchesRequiresHigherCost matchesState = iota
matchesFound
matchesNone
)

func matchesStatePred(b bool) matchesState {
if b {
return matchesFound
}
return matchesNone
}

func matchesStateForSlice[T any](v []T) matchesState {
return matchesStatePred(len(v) > 0)
}

// An expression tree coupled with matches. The matchtree has two
// functions:
//
Expand Down Expand Up @@ -81,7 +100,7 @@ type matchTree interface {
docIterator

// returns whether this matches, and if we are sure.
matches(cp *contentProvider, cost int, known map[matchTree]bool) (match bool, sure bool)
matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState
}

// docMatchTree iterates over documents for which predicate(docID) returns true.
Expand Down Expand Up @@ -200,13 +219,13 @@ func (t *symbolRegexpMatchTree) prepare(doc uint32) {
t.matchTree.prepare(doc)
}

func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
if t.reEvaluated {
return len(t.found) > 0, true
return matchesStateForSlice(t.found)
}

if cost < costRegexp {
return false, false
return matchesRequiresHigherCost
}

sections := cp.docSections()
Expand Down Expand Up @@ -235,7 +254,7 @@ func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map
t.found = found
t.reEvaluated = true

return len(t.found) > 0, true
return matchesStateForSlice(t.found)
}

type symbolSubstrMatchTree struct {
Expand Down Expand Up @@ -564,23 +583,25 @@ func visitMatches(t matchTree, known map[matchTree]bool, f func(matchTree)) {

// all matches() methods.

func (t *docMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
return t.predicate(cp.idx), true
func (t *docMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
return matchesStatePred(t.predicate(cp.idx))
}

func (t *bruteForceMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
return true, true
func (t *bruteForceMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
return matchesFound
}

// andLineMatchTree is a performance optimization of andMatchTree. For content
// searches we don't want to run the regex engine if there is no line that
// contains matches from all terms.
func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
matches, sure := t.andMatchTree.matches(cp, cost, known)
if !(sure && matches) {
return matches, sure
func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
if state := t.andMatchTree.matches(cp, cost, known); state != matchesFound {
return state
}

// Invariant: all children have matches. If any line contains all of them we
// can return MatchesFound.

// find child with fewest candidates
min := maxUInt32
fewestChildren := 0
Expand All @@ -589,7 +610,7 @@ func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matc
// make sure we are running a content search and that all candidates are a
// substrMatchTree
if !ok || v.fileName {
return matches, sure
return matchesFound
}
if len(v.current) < min {
min = len(v.current)
Expand Down Expand Up @@ -648,56 +669,63 @@ nextLine:
}
// return early once we found any line that contains matches from all children
if hits == len(t.children) {
return matches, true
return matchesFound
}
}
return false, true
return matchesNone
}

func (t *andMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
func (t *andMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
sure := true

for _, ch := range t.children {
v, ok := evalMatchTree(cp, cost, known, ch)
if ok && !v {
return false, true
}
if !ok {
switch evalMatchTree(cp, cost, known, ch) {
case matchesRequiresHigherCost:
// keep evaluating other children incase we come across matchesNone
sure = false
case matchesFound:
// will return this if every child has this value
case matchesNone:
return matchesNone
}
}

return true, sure
if sure {
return matchesFound
}
return matchesRequiresHigherCost
}

func (t *orMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
matches := false
sure := true
func (t *orMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
// we could short-circuit, but we want to use the other possibilities as a
// ranking signal. So we always return the most conservative state.
state := matchesNone
for _, ch := range t.children {
v, ok := evalMatchTree(cp, cost, known, ch)
if ok {
// we could short-circuit, but we want to use
// the other possibilities as a ranking
// signal.
matches = matches || v
} else {
sure = false
switch evalMatchTree(cp, cost, known, ch) {
case matchesRequiresHigherCost:
state = matchesRequiresHigherCost
case matchesFound:
if state != matchesRequiresHigherCost {
state = matchesFound
}
case matchesNone:
// noop
}
}
return matches, sure
return state
}

func (t *branchQueryMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
return t.branchMask() != 0, true
func (t *branchQueryMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
return matchesStatePred(t.branchMask() != 0)
}

func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
if t.reEvaluated {
return len(t.found) > 0, true
return matchesStateForSlice(t.found)
}

if cost < costRegexp {
return false, false
return matchesRequiresHigherCost
}

cp.stats.RegexpsConsidered++
Expand All @@ -715,16 +743,16 @@ func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[match
t.found = found
t.reEvaluated = true

return len(t.found) > 0, true
return matchesStateForSlice(t.found)
}

func (t *wordMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
func (t *wordMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
if t.evaluated {
return len(t.found) > 0, true
return matchesStateForSlice(t.found)
}

if cost < costRegexp {
return false, false
return matchesRequiresHigherCost
}

data := cp.data(t.fileName)
Expand Down Expand Up @@ -754,7 +782,7 @@ func (t *wordMatchTree) matches(cp *contentProvider, cost int, known map[matchTr
t.found = found
t.evaluated = true

return len(t.found) > 0, true
return matchesStateForSlice(t.found)
}

// breakMatchesOnNewlines returns matches resulting from breaking each element
Expand Down Expand Up @@ -792,43 +820,51 @@ func breakOnNewlines(cm *candidateMatch, text []byte) []*candidateMatch {
return cms
}

func evalMatchTree(cp *contentProvider, cost int, known map[matchTree]bool, mt matchTree) (bool, bool) {
func evalMatchTree(cp *contentProvider, cost int, known map[matchTree]bool, mt matchTree) matchesState {
if v, ok := known[mt]; ok {
return v, true
return matchesStatePred(v)
}

v, ok := mt.matches(cp, cost, known)
if ok {
known[mt] = v
ms := mt.matches(cp, cost, known)
if ms != matchesRequiresHigherCost {
known[mt] = ms == matchesFound
}

return v, ok
return ms
}

func (t *notMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
v, ok := evalMatchTree(cp, cost, known, t.child)
return !v, ok
func (t *notMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
switch evalMatchTree(cp, cost, known, t.child) {
case matchesRequiresHigherCost:
return matchesRequiresHigherCost
case matchesFound:
return matchesNone
case matchesNone:
return matchesFound
default:
panic("unreachable")
}
}

func (t *fileNameMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
func (t *fileNameMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
return evalMatchTree(cp, cost, known, t.child)
}

func (t *substrMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) {
func (t *substrMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState {
if t.contEvaluated {
return len(t.current) > 0, true
return matchesStateForSlice(t.current)
}

if len(t.current) == 0 {
return false, true
return matchesNone
}

if t.fileName && cost < costMemory {
return false, false
return matchesRequiresHigherCost
}

if !t.fileName && cost < costContent {
return false, false
return matchesRequiresHigherCost
}

pruned := t.current[:0]
Expand All @@ -843,7 +879,7 @@ func (t *substrMatchTree) matches(cp *contentProvider, cost int, known map[match
t.current = pruned
t.contEvaluated = true

return len(t.current) > 0, true
return matchesStateForSlice(t.current)
}

type matchTreeOpt struct {
Expand Down

0 comments on commit 06e9071

Please sign in to comment.