diff --git a/matchtree.go b/matchtree.go index ccd648d4a..2bc1f66cc 100644 --- a/matchtree.go +++ b/matchtree.go @@ -183,6 +183,10 @@ type noVisitMatchTree struct { type regexpMatchTree struct { regexp *regexp.Regexp + // origRegexp is the original parsed regexp from the query structure. It + // does not include mutations such as case sensitivity. + origRegexp *syntax.Regexp + fileName bool // mutable @@ -200,8 +204,9 @@ func newRegexpMatchTree(s *query.Regexp) *regexpMatchTree { } return ®expMatchTree{ - regexp: regexp.MustCompile(prefix + s.Regexp.String()), - fileName: s.FileName, + regexp: regexp.MustCompile(prefix + s.Regexp.String()), + origRegexp: s.Regexp, + fileName: s.FileName, } } @@ -1107,19 +1112,19 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) }, nil } - var regexp *regexp.Regexp + var regexpMT *regexpMatchTree visitMatchTree(subMT, func(mt matchTree) { if t, ok := mt.(*regexpMatchTree); ok { - regexp = t.regexp + regexpMT = t } }) - if regexp == nil { + if regexpMT == nil { return nil, fmt.Errorf("found %T inside query.Symbol", subMT) } return &symbolRegexpMatchTree{ - regexp: regexp, - all: regexp.String() == "(?i)(?-s:.*)", + regexp: regexpMT.regexp, + all: isRegexpAll(regexpMT.origRegexp), matchTree: subMT, }, nil @@ -1376,3 +1381,26 @@ func pruneMatchTree(mt matchTree) (matchTree, error) { } return mt, err } + +// isRegexpAll returns true if the query matches all possible lines. +// +// Note: it is possible for a funky regex to actually match all but this +// returns false. This returns true for normal looking regexes like ".*" or +// "(?-s:.*)". +func isRegexpAll(r *syntax.Regexp) bool { + // Note: we don't care about any flags since we are looking for .* and we + // don't mind if . matches all or everything but newline. + + // Our main target: .* + if r.Op == syntax.OpStar && len(r.Sub) == 1 { // * + // (?s:.) or (?-s:.) + return r.Sub[0].Op == syntax.OpAnyChar || r.Sub[0].Op == syntax.OpAnyCharNotNL + } + + // Strip away expressions being wrapped in paranthesis + if (r.Op == syntax.OpCapture || r.Op == syntax.OpConcat) && len(r.Sub) == 1 { + return isRegexpAll(r.Sub[0]) + } + + return false +} diff --git a/matchtree_test.go b/matchtree_test.go index 9a7362d92..0f56f7623 100644 --- a/matchtree_test.go +++ b/matchtree_test.go @@ -16,6 +16,7 @@ package zoekt import ( "reflect" + "regexp/syntax" "testing" "github.com/RoaringBitmap/roaring" @@ -385,3 +386,38 @@ func TestRepoIDs(t *testing.T) { t.Fatalf("expected %d document, but got at least 1 more", len(want)) } } + +func TestIsRegexpAll(t *testing.T) { + valid := []string{ + ".*", + "(.*)", + "(?-s:.*)", + "(?s:.*)", + "(?i)(?-s:.*)", + } + invalid := []string{ + ".", + "foo", + "(foo.*)", + } + + for _, s := range valid { + r, err := syntax.Parse(s, syntax.Perl) + if err != nil { + t.Fatal(err) + } + if !isRegexpAll(r) { + t.Errorf("expected %q to match all", s) + } + } + + for _, s := range invalid { + r, err := syntax.Parse(s, syntax.Perl) + if err != nil { + t.Fatal(err) + } + if isRegexpAll(r) { + t.Errorf("expected %q to not match all", s) + } + } +}