Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Search: solidify conversion to Zoekt queries (#60349)
Browse files Browse the repository at this point in the history
For the 'search-content-based-lang-detection' feature, this PR improves how filters are converted to Zoekt queries:
* Only convert filters to Zoekt lang filters, instead of also including file filters
* Handle excluded filters as well, for example `-lang:go`
* Use 'and' instead of 'or' to combine multiple filters
  • Loading branch information
jtibshirani authored Feb 9, 2024
1 parent bb926a6 commit bec5621
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 79 deletions.
48 changes: 25 additions & 23 deletions internal/search/zoekt/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,30 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
}
}

var and []zoekt.Q
if q != nil {
and = append(and, q)
}

// Handle file: and -file: filters.
filesInclude, filesExclude := b.IncludeExcludeValues(query.FieldFile)

// Handle lang: and -lang: filters.
// By default, languages are converted to file filters. When the 'search-content-based-lang-detection'
// feature is enabled, we use Zoekt's native language filters, which are based on the actual language
// of the file (as determined by go-enry).
langInclude, langExclude := b.IncludeExcludeValues(query.FieldLang)
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)

var and []zoekt.Q
if q != nil {
and = append(and, q)
if feat.ContentBasedLangFilters {
for _, lang := range langInclude {
and = append(and, toLangFilter(lang))
}
for _, lang := range langExclude {
filter := toLangFilter(lang)
and = append(and, &zoekt.Not{Child: filter})
}
} else {
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)
}

// zoekt also uses regular expressions for file paths
Expand Down Expand Up @@ -68,26 +82,14 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
and = append(and, zoekt.NewAnd(repoHasFilters...))
}

// Languages are already partially expressed with IncludePatterns, but Zoekt creates
// more precise language metadata based on file contents analyzed by go-enry, so it's
// useful to pass lang: queries down.
//
// Currently, negated lang queries create filename-based ExcludePatterns that cannot be
// corrected by the more precise language metadata. If this is a problem, indexed search
// queries should have a special query converter that produces *only* Language predicates
// instead of filepatterns.
if len(langInclude) > 0 && feat.ContentBasedLangFilters {
or := &zoekt.Or{}
for _, lang := range langInclude {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
or.Children = append(or.Children, &zoekt.Language{Language: lang})
}
and = append(and, or)
}

return zoekt.Simplify(zoekt.NewAnd(and...)), nil
}

func toLangFilter(lang string) zoekt.Q {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
return &zoekt.Language{Language: lang}
}

func QueryForFileContentArgs(opt query.RepoHasFileContentArgs, caseSensitive bool) zoekt.Q {
var children []zoekt.Q
if opt.Path != "" {
Expand Down
139 changes: 83 additions & 56 deletions internal/search/zoekt/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,91 +16,118 @@ import (

func TestQueryToZoektQuery(t *testing.T) {
cases := []struct {
Name string
Type search.IndexedRequestType
Pattern string
Features search.Features
Query string
Name string
Type search.IndexedRequestType
Query string
Features search.Features
WantZoektQuery string
}{
{
Name: "substr",
Type: search.TextRequest,
Pattern: `foo patterntype:regexp`,
Query: "foo case:no",
Name: "substr",
Type: search.TextRequest,
Query: `foo patterntype:regexp`,
WantZoektQuery: "foo case:no",
},
{
Name: "symbol substr",
Type: search.SymbolRequest,
Pattern: `foo patterntype:regexp type:symbol`,
Query: "sym:foo case:no",
Name: "symbol substr",
Type: search.SymbolRequest,
Query: `foo patterntype:regexp type:symbol`,
WantZoektQuery: "sym:foo case:no",
},
{
Name: "regex",
Type: search.TextRequest,
Pattern: `(foo).*?(bar) patterntype:regexp`,
Query: "(foo).*?(bar) case:no",
Name: "regex",
Type: search.TextRequest,
Query: `(foo).*?(bar) patterntype:regexp`,
WantZoektQuery: "(foo).*?(bar) case:no",
},
{
Name: "path",
Type: search.TextRequest,
Pattern: `foo file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
Query: `foo case:no f:\.go$ f:\.yaml$ -f:\bvendor\b`,
Name: "path",
Type: search.TextRequest,
Query: `foo file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
WantZoektQuery: `foo case:no f:\.go$ f:\.yaml$ -f:\bvendor\b`,
},
{
Name: "case",
Type: search.TextRequest,
Pattern: `foo case:yes patterntype:regexp file:\.go$ file:yaml`,
Query: `foo case:yes f:\.go$ f:yaml`,
Name: "case",
Type: search.TextRequest,
Query: `foo case:yes patterntype:regexp file:\.go$ file:yaml`,
WantZoektQuery: `foo case:yes f:\.go$ f:yaml`,
},
{
Name: "casepath",
Type: search.TextRequest,
Pattern: `foo case:yes file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
Query: `foo case:yes f:\.go$ f:\.yaml$ -f:\bvendor\b`,
Name: "casepath",
Type: search.TextRequest,
Query: `foo case:yes file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
WantZoektQuery: `foo case:yes f:\.go$ f:\.yaml$ -f:\bvendor\b`,
},
{
Name: "path matches only",
Type: search.TextRequest,
Pattern: `test type:path`,
Query: `f:test`,
Name: "path matches only",
Type: search.TextRequest,
Query: `test type:path`,
WantZoektQuery: `f:test`,
},
{
Name: "content matches only",
Type: search.TextRequest,
Pattern: `test type:file patterntype:literal`,
Query: `c:test`,
Name: "content matches only",
Type: search.TextRequest,
Query: `test type:file patterntype:literal`,
WantZoektQuery: `c:test`,
},
{
Name: "content and path matches",
Type: search.TextRequest,
Pattern: `test`,
Query: `test`,
Name: "content and path matches",
Type: search.TextRequest,
Query: `test`,
WantZoektQuery: `test`,
},
{
Name: "Just file",
Type: search.TextRequest,
Pattern: `file:\.go$`,
Query: `file:"\\.go(?m:$)"`,
Name: "Just file",
Type: search.TextRequest,
Query: `file:\.go$`,
WantZoektQuery: `file:"\\.go(?m:$)"`,
},
{
Name: "Languages is ignored",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
Name: "Languages get passed as file filter",
Type: search.TextRequest,
Query: `file:\.go$ lang:go`,
WantZoektQuery: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
},
{
Name: "language gets passed as both file include and lang: predicate",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Name: "Language get passed as lang: query",
Type: search.TextRequest,
Query: `lang:go`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)" lang:Go`,
WantZoektQuery: `lang:go`,
},
{
Name: "Multiple languages get passed as lang queries",
Type: search.TextRequest,
Query: `lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
WantZoektQuery: `lang:go lang:typescript`,
},
{
Name: "Excluded languages get passed as lang: query",
Type: search.TextRequest,
Query: `lang:go -lang:typescript -lang:markdown`,
Features: search.Features{
ContentBasedLangFilters: true,
},
WantZoektQuery: `lang:go -lang:typescript -lang:markdown`,
},
{
Name: "Mixed file and lang filters",
Type: search.TextRequest,
Query: `file:\.go$ lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
WantZoektQuery: `file:"\\.go(?m:$)" lang:go lang:typescript`,
},
}
for _, tt := range cases {
t.Run(tt.Name, func(t *testing.T) {
sourceQuery, _ := query.ParseRegexp(tt.Pattern)
sourceQuery, _ := query.ParseRegexp(tt.Query)
b, _ := query.ToBasicQuery(sourceQuery)

types, _ := b.ToParseTree().StringValues(query.FieldType)
Expand All @@ -110,9 +137,9 @@ func TestQueryToZoektQuery(t *testing.T) {
t.Fatal("QueryToZoektQuery failed:", err)
}

zoektQuery, err := zoekt.Parse(tt.Query)
zoektQuery, err := zoekt.Parse(tt.WantZoektQuery)
if err != nil {
t.Fatalf("failed to parse %q: %v", tt.Query, err)
t.Fatalf("failed to parse %q: %v", tt.WantZoektQuery, err)
}

if !queryEqual(got, zoektQuery) {
Expand Down

0 comments on commit bec5621

Please sign in to comment.