Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Search: solidify conversion to Zoekt queries #60349

Merged
merged 7 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 25 additions & 23 deletions internal/search/zoekt/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,30 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
}
}

var and []zoekt.Q
if q != nil {
and = append(and, q)
}

// Handle file: and -file: filters.
filesInclude, filesExclude := b.IncludeExcludeValues(query.FieldFile)

// Handle lang: and -lang: filters.
// By default, languages are converted to file filters. When the 'search-content-based-lang-detection'
// feature is enabled, we use Zoekt's native language filters, which are based on the actual language
// of the file (as determined by go-enry).
langInclude, langExclude := b.IncludeExcludeValues(query.FieldLang)
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)

var and []zoekt.Q
if q != nil {
and = append(and, q)
if feat.ContentBasedLangFilters {
for _, lang := range langInclude {
and = append(and, toLangFilter(lang))
}
for _, lang := range langExclude {
filter := toLangFilter(lang)
and = append(and, &zoekt.Not{Child: filter})
}
} else {
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)
}

// zoekt also uses regular expressions for file paths
Expand Down Expand Up @@ -68,26 +82,14 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
and = append(and, zoekt.NewAnd(repoHasFilters...))
}

// Languages are already partially expressed with IncludePatterns, but Zoekt creates
// more precise language metadata based on file contents analyzed by go-enry, so it's
// useful to pass lang: queries down.
//
// Currently, negated lang queries create filename-based ExcludePatterns that cannot be
// corrected by the more precise language metadata. If this is a problem, indexed search
// queries should have a special query converter that produces *only* Language predicates
// instead of filepatterns.
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
if len(langInclude) > 0 && feat.ContentBasedLangFilters {
or := &zoekt.Or{}
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
for _, lang := range langInclude {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
or.Children = append(or.Children, &zoekt.Language{Language: lang})
}
and = append(and, or)
}

return zoekt.Simplify(zoekt.NewAnd(and...)), nil
}

func toLangFilter(lang string) zoekt.Q {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
return &zoekt.Language{Language: lang}
}

func QueryForFileContentArgs(opt query.RepoHasFileContentArgs, caseSensitive bool) zoekt.Q {
var children []zoekt.Q
if opt.Path != "" {
Expand Down
35 changes: 31 additions & 4 deletions internal/search/zoekt/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,46 @@ func TestQueryToZoektQuery(t *testing.T) {
Query: `file:"\\.go(?m:$)"`,
},
{
Name: "Languages is ignored",
Name: "Languages get passed as file filter",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
},
{
Name: "language gets passed as both file include and lang: predicate",
Name: "Language get passed as lang: query",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Pattern: `lang:go`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `lang:Go`,
},
{
Name: "Multiple languages get passed as lang queries",
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
Type: search.TextRequest,
Pattern: `lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `lang:Go lang:Typescript`,
},
{
Name: "Excluded languages get passed as lang: query",
Type: search.TextRequest,
Pattern: `lang:go -lang:typescript -lang:markdown`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `lang:Go -lang:Typescript -lang:markdown`,
},
{
Name: "Mixed file and lang filters",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)" lang:Go`,
Query: `file:"\\.go(?m:$)" lang:Go lang:Typescript`,
},
}
for _, tt := range cases {
Expand Down
Loading