Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Search: solidify conversion to Zoekt queries #60349

Merged
merged 7 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 25 additions & 23 deletions internal/search/zoekt/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,30 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
}
}

var and []zoekt.Q
if q != nil {
and = append(and, q)
}

// Handle file: and -file: filters.
filesInclude, filesExclude := b.IncludeExcludeValues(query.FieldFile)

// Handle lang: and -lang: filters.
// By default, languages are converted to file filters. When the 'search-content-based-lang-detection'
// feature is enabled, we use Zoekt's native language filters, which are based on the actual language
// of the file (as determined by go-enry).
langInclude, langExclude := b.IncludeExcludeValues(query.FieldLang)
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)

var and []zoekt.Q
if q != nil {
and = append(and, q)
if feat.ContentBasedLangFilters {
for _, lang := range langInclude {
and = append(and, toLangFilter(lang))
}
for _, lang := range langExclude {
filter := toLangFilter(lang)
and = append(and, &zoekt.Not{Child: filter})
}
} else {
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)
}

// zoekt also uses regular expressions for file paths
Expand Down Expand Up @@ -68,26 +82,14 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
and = append(and, zoekt.NewAnd(repoHasFilters...))
}

// Languages are already partially expressed with IncludePatterns, but Zoekt creates
// more precise language metadata based on file contents analyzed by go-enry, so it's
// useful to pass lang: queries down.
//
// Currently, negated lang queries create filename-based ExcludePatterns that cannot be
// corrected by the more precise language metadata. If this is a problem, indexed search
// queries should have a special query converter that produces *only* Language predicates
// instead of filepatterns.
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
if len(langInclude) > 0 && feat.ContentBasedLangFilters {
or := &zoekt.Or{}
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
for _, lang := range langInclude {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
or.Children = append(or.Children, &zoekt.Language{Language: lang})
}
and = append(and, or)
}

return zoekt.Simplify(zoekt.NewAnd(and...)), nil
}

func toLangFilter(lang string) zoekt.Q {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
return &zoekt.Language{Language: lang}
}

func QueryForFileContentArgs(opt query.RepoHasFileContentArgs, caseSensitive bool) zoekt.Q {
var children []zoekt.Q
if opt.Path != "" {
Expand Down
139 changes: 83 additions & 56 deletions internal/search/zoekt/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,91 +16,118 @@ import (

func TestQueryToZoektQuery(t *testing.T) {
cases := []struct {
Name string
Type search.IndexedRequestType
Pattern string
Features search.Features
Query string
Name string
Type search.IndexedRequestType
Query string
Features search.Features
WantZoektQuery string
}{
{
Name: "substr",
Type: search.TextRequest,
Pattern: `foo patterntype:regexp`,
Query: "foo case:no",
Name: "substr",
Type: search.TextRequest,
Query: `foo patterntype:regexp`,
WantZoektQuery: "foo case:no",
},
{
Name: "symbol substr",
Type: search.SymbolRequest,
Pattern: `foo patterntype:regexp type:symbol`,
Query: "sym:foo case:no",
Name: "symbol substr",
Type: search.SymbolRequest,
Query: `foo patterntype:regexp type:symbol`,
WantZoektQuery: "sym:foo case:no",
},
{
Name: "regex",
Type: search.TextRequest,
Pattern: `(foo).*?(bar) patterntype:regexp`,
Query: "(foo).*?(bar) case:no",
Name: "regex",
Type: search.TextRequest,
Query: `(foo).*?(bar) patterntype:regexp`,
WantZoektQuery: "(foo).*?(bar) case:no",
},
{
Name: "path",
Type: search.TextRequest,
Pattern: `foo file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
Query: `foo case:no f:\.go$ f:\.yaml$ -f:\bvendor\b`,
Name: "path",
Type: search.TextRequest,
Query: `foo file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
WantZoektQuery: `foo case:no f:\.go$ f:\.yaml$ -f:\bvendor\b`,
},
{
Name: "case",
Type: search.TextRequest,
Pattern: `foo case:yes patterntype:regexp file:\.go$ file:yaml`,
Query: `foo case:yes f:\.go$ f:yaml`,
Name: "case",
Type: search.TextRequest,
Query: `foo case:yes patterntype:regexp file:\.go$ file:yaml`,
WantZoektQuery: `foo case:yes f:\.go$ f:yaml`,
},
{
Name: "casepath",
Type: search.TextRequest,
Pattern: `foo case:yes file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
Query: `foo case:yes f:\.go$ f:\.yaml$ -f:\bvendor\b`,
Name: "casepath",
Type: search.TextRequest,
Query: `foo case:yes file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
WantZoektQuery: `foo case:yes f:\.go$ f:\.yaml$ -f:\bvendor\b`,
},
{
Name: "path matches only",
Type: search.TextRequest,
Pattern: `test type:path`,
Query: `f:test`,
Name: "path matches only",
Type: search.TextRequest,
Query: `test type:path`,
WantZoektQuery: `f:test`,
},
{
Name: "content matches only",
Type: search.TextRequest,
Pattern: `test type:file patterntype:literal`,
Query: `c:test`,
Name: "content matches only",
Type: search.TextRequest,
Query: `test type:file patterntype:literal`,
WantZoektQuery: `c:test`,
},
{
Name: "content and path matches",
Type: search.TextRequest,
Pattern: `test`,
Query: `test`,
Name: "content and path matches",
Type: search.TextRequest,
Query: `test`,
WantZoektQuery: `test`,
},
{
Name: "Just file",
Type: search.TextRequest,
Pattern: `file:\.go$`,
Query: `file:"\\.go(?m:$)"`,
Name: "Just file",
Type: search.TextRequest,
Query: `file:\.go$`,
WantZoektQuery: `file:"\\.go(?m:$)"`,
},
{
Name: "Languages is ignored",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
Name: "Languages get passed as file filter",
Type: search.TextRequest,
Query: `file:\.go$ lang:go`,
WantZoektQuery: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
},
{
Name: "language gets passed as both file include and lang: predicate",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Name: "Language get passed as lang: query",
Type: search.TextRequest,
Query: `lang:go`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)" lang:Go`,
WantZoektQuery: `lang:go`,
},
{
Name: "Multiple languages get passed as lang queries",
Type: search.TextRequest,
Query: `lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
WantZoektQuery: `lang:go lang:typescript`,
},
{
Name: "Excluded languages get passed as lang: query",
Type: search.TextRequest,
Query: `lang:go -lang:typescript -lang:markdown`,
Features: search.Features{
ContentBasedLangFilters: true,
},
WantZoektQuery: `lang:go -lang:typescript -lang:markdown`,
},
{
Name: "Mixed file and lang filters",
Type: search.TextRequest,
Query: `file:\.go$ lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
WantZoektQuery: `file:"\\.go(?m:$)" lang:go lang:typescript`,
},
}
for _, tt := range cases {
t.Run(tt.Name, func(t *testing.T) {
sourceQuery, _ := query.ParseRegexp(tt.Pattern)
sourceQuery, _ := query.ParseRegexp(tt.Query)
b, _ := query.ToBasicQuery(sourceQuery)

types, _ := b.ToParseTree().StringValues(query.FieldType)
Expand All @@ -110,9 +137,9 @@ func TestQueryToZoektQuery(t *testing.T) {
t.Fatal("QueryToZoektQuery failed:", err)
}

zoektQuery, err := zoekt.Parse(tt.Query)
zoektQuery, err := zoekt.Parse(tt.WantZoektQuery)
if err != nil {
t.Fatalf("failed to parse %q: %v", tt.Query, err)
t.Fatalf("failed to parse %q: %v", tt.WantZoektQuery, err)
}

if !queryEqual(got, zoektQuery) {
Expand Down
Loading