Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Search: solidify conversion to Zoekt queries #60349

Merged
merged 7 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 29 additions & 23 deletions internal/search/zoekt/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,30 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
}
}

var and []zoekt.Q
if q != nil {
and = append(and, q)
}

// Handle file: and -file: filters.
filesInclude, filesExclude := b.IncludeExcludeValues(query.FieldFile)
// Handle lang: and -lang: filters.
// Languages are already partially expressed with IncludePatterns, but Zoekt creates
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
// more precise language metadata based on file contents analyzed by go-enry, so it's
// useful to pass lang: queries down.
langInclude, langExclude := b.IncludeExcludeValues(query.FieldLang)
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)

var and []zoekt.Q
if q != nil {
and = append(and, q)
if feat.ContentBasedLangFilters {
if len(langInclude) > 0 {
filters := toLangFilters(langInclude)
and = append(and, &zoekt.Or{Children: filters})
}
if len(langExclude) > 0 {
filters := toLangFilters(langExclude)
and = append(and, &zoekt.Not{Child: &zoekt.Or{Children: filters}})
}
} else {
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)
}

// zoekt also uses regular expressions for file paths
Expand Down Expand Up @@ -68,26 +82,18 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
and = append(and, zoekt.NewAnd(repoHasFilters...))
}

// Languages are already partially expressed with IncludePatterns, but Zoekt creates
// more precise language metadata based on file contents analyzed by go-enry, so it's
// useful to pass lang: queries down.
//
// Currently, negated lang queries create filename-based ExcludePatterns that cannot be
// corrected by the more precise language metadata. If this is a problem, indexed search
// queries should have a special query converter that produces *only* Language predicates
// instead of filepatterns.
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
if len(langInclude) > 0 && feat.ContentBasedLangFilters {
or := &zoekt.Or{}
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
for _, lang := range langInclude {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
or.Children = append(or.Children, &zoekt.Language{Language: lang})
}
and = append(and, or)
}

return zoekt.Simplify(zoekt.NewAnd(and...)), nil
}

func toLangFilters(langs []string) []zoekt.Q {
var filters []zoekt.Q
for _, lang := range langs {
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know Go doesn't have built-in assertions and it's documented why, but I think it would be helpful to assert that the second return value is true to catch potential bugs.

Copy link
Member Author

@jtibshirani jtibshirani Feb 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By "assert" do you mean check the value and return an error? Unfortunately that makes this quite verbose and less readable. For "runtime assertions", do we ever use panic? I haven't seen this used in our Go code, maybe it goes against the spirit of Go's design.

For now I didn't touch this (as it's a pre-existing issue) but would be interested in your thoughts!

filters = append(filters, &zoekt.Language{Language: lang})
}
return filters
}

func QueryForFileContentArgs(opt query.RepoHasFileContentArgs, caseSensitive bool) zoekt.Q {
var children []zoekt.Q
if opt.Path != "" {
Expand Down
24 changes: 18 additions & 6 deletions internal/search/zoekt/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,31 @@ func TestQueryToZoektQuery(t *testing.T) {
Query: `file:"\\.go(?m:$)"`,
},
{
Name: "Languages is ignored",
Name: "Languages get passed as lang: query",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
Pattern: `lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `lang:Go or lang:Typescript`,
},
{
Name: "Excluded languages get passed as lang: query",
Type: search.TextRequest,
Pattern: `lang:go -lang:typescript -lang:markdown`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `lang:Go -(lang:Typescript or lang:markdown)`,
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
},
{
Name: "language gets passed as both file include and lang: predicate",
Name: "Mixed file and lang filters",
Type: search.TextRequest,
Pattern: `file:\.go$ lang:go`,
Pattern: `file:\.go$ lang:go lang:typescript`,
Features: search.Features{
ContentBasedLangFilters: true,
},
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)" lang:Go`,
Query: `file:"\\.go(?m:$)" (lang:Go or lang:Typescript)`,
},
}
for _, tt := range cases {
Expand Down
Loading