Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Structural search: fix precise lang filtering (#63791)
Browse files Browse the repository at this point in the history
The feature `search-content-based-lang-detection` was never implemented
for structural search. This PR fills in the missing pieces.

Fixes #61714
Relates to
https://linear.app/sourcegraph/project/make-precise-lang-filtering-default-4f06cfa28567

## Test plan

Two new unit tests checking that the lang filters are now correctly
passed to Zoekt.
  • Loading branch information
jtibshirani authored Jul 15, 2024
1 parent 660d686 commit cf05a9b
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 5 deletions.
18 changes: 13 additions & 5 deletions cmd/searcher/internal/search/zoekt_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/sourcegraph/sourcegraph/lib/errors"
)

func handleFilePathPatterns(patternInfo *protocol.PatternInfo) (query.Q, error) {
func handleSearchFilters(patternInfo *protocol.PatternInfo) (query.Q, error) {
var and []query.Q

// Zoekt uses regular expressions for file paths.
Expand All @@ -42,10 +42,18 @@ func handleFilePathPatterns(patternInfo *protocol.PatternInfo) (query.Q, error)
and = append(and, &query.Not{Child: q})
}

for _, lang := range patternInfo.IncludeLangs {
and = append(and, &query.Language{Language: lang})
}

for _, lang := range patternInfo.ExcludeLangs {
and = append(and, &query.Not{Child: &query.Language{Language: lang}})
}

return query.NewAnd(and...), nil
}

func buildQuery(pattern string, branchRepos []query.BranchRepos, filePathPatterns query.Q, shortcircuit bool) (query.Q, error) {
func buildQuery(pattern string, branchRepos []query.BranchRepos, filterQuery query.Q, shortcircuit bool) (query.Q, error) {
regexString := comby.StructuralPatToRegexpQuery(pattern, shortcircuit)
if len(regexString) == 0 {
return &query.Const{Value: true}, nil
Expand All @@ -56,7 +64,7 @@ func buildQuery(pattern string, branchRepos []query.BranchRepos, filePathPattern
}
return query.NewAnd(
&query.BranchesRepos{List: branchRepos},
filePathPatterns,
filterQuery,
&query.Regexp{
Regexp: re,
CaseSensitive: true,
Expand Down Expand Up @@ -87,13 +95,13 @@ func zoektSearch(ctx context.Context, logger log.Logger, client zoekt.Streamer,
}).ToSearchOptions(ctx)
searchOpts.Whole = true

filePathPatterns, err := handleFilePathPatterns(args)
filterQuery, err := handleSearchFilters(args)
if err != nil {
return err
}

t0 := time.Now()
q, err := buildQuery(atom.Value, branchRepos, filePathPatterns, false)
q, err := buildQuery(atom.Value, branchRepos, filterQuery, false)
if err != nil {
return err
}
Expand Down
53 changes: 53 additions & 0 deletions cmd/searcher/internal/search/zoekt_search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,56 @@ func Test_zoektSearch(t *testing.T) {
)
require.Error(t, err)
}

func TestHandleSearchFilters(t *testing.T) {
tests := []struct {
name string
patternInfo *protocol.PatternInfo
expectedQ query.Q
expectedErr error
}{
{
name: "Include and exclude paths",
patternInfo: &protocol.PatternInfo{
IncludePaths: []string{"\\.go", "cmd"},
ExcludePaths: "vendor/",
IsCaseSensitive: false,
},
expectedQ: query.NewAnd(
&query.Substring{Pattern: ".go", FileName: true},
&query.Substring{Pattern: "cmd", FileName: true},
&query.Not{Child: &query.Substring{Pattern: "vendor/", FileName: true}},
),
expectedErr: nil,
},
{
name: "Include and exclude languages",
patternInfo: &protocol.PatternInfo{
IncludePaths: []string{"cmd"},
IncludeLangs: []string{"go", "typescript"},
ExcludeLangs: []string{"javascript"},
},
expectedQ: query.NewAnd(
&query.Substring{Pattern: "cmd", FileName: true},
&query.Language{Language: "go"},
&query.Language{Language: "typescript"},
&query.Not{Child: &query.Language{Language: "javascript"}},
),
expectedErr: nil,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
q, err := handleSearchFilters(tt.patternInfo)

if tt.expectedErr != nil {
require.Error(t, err)
require.Equal(t, tt.expectedErr.Error(), err.Error())
} else {
require.NoError(t, err)
require.Equal(t, tt.expectedQ, q)
}
})
}
}
4 changes: 4 additions & 0 deletions internal/search/job/jobutil/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,10 @@ func TestToTextPatternInfo(t *testing.T) {
}, {
input: `repo:^github\.com/sgtest/sourcegraph-typescript$ file:^README\.md "basic :[_] access :[_]" patterntype:structural`,
output: autogold.Expect(`{"Query":{"Value":"\"basic :[_] access :[_]\"","IsNegated":false,"IsRegExp":false},"IsStructuralPat":true,"CombyRule":"","IsCaseSensitive":false,"FileMatchLimit":30,"Index":"yes","Select":[],"IncludePaths":["^README\\.md"],"ExcludePaths":"","IncludeLangs":null,"ExcludeLangs":null,"PathPatternsAreCaseSensitive":false,"PatternMatchesContent":true,"PatternMatchesPath":true,"Languages":null}`),
}, {
input: `repo:^github\.com/sgtest/sourcegraph-typescript$ lang:typescript "basic :[_] access :[_]" patterntype:structural`,
feat: search.Features{ContentBasedLangFilters: true},
output: autogold.Expect(`{"Query":{"Value":"\"basic :[_] access :[_]\"","IsNegated":false,"IsRegExp":false},"IsStructuralPat":true,"CombyRule":"","IsCaseSensitive":false,"FileMatchLimit":30,"Index":"yes","Select":[],"IncludePaths":null,"ExcludePaths":"","IncludeLangs":["TypeScript"],"ExcludeLangs":null,"PathPatternsAreCaseSensitive":false,"PatternMatchesContent":true,"PatternMatchesPath":true,"Languages":["typescript"]}`),
}, {
input: `sgtest lang:magik type:file`,
feat: search.Features{ContentBasedLangFilters: true},
Expand Down

0 comments on commit cf05a9b

Please sign in to comment.