Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Search: add index shard concurrency to site config (#58514)
Browse files Browse the repository at this point in the history
In sourcegraph/zoekt#702, we updated indexserver to parse symbols in parallel
by spawning a new ctags process per shard. By default, indexing uses all
available CPUs to create shards in parallel, so now it will create many more
processes than before.

As a safeguard, we're exposing a site config setting to reduce the indexing
concurrency. It's not intended to be set by users, but will let us experiment
and make sure the defaults are solid.

As part of this change, I bumped the Zoekt dependency to pull in the change to
IndexOptions.
  • Loading branch information
jtibshirani authored Nov 27, 2023
1 parent 63a1eca commit 5a8bf40
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 6 deletions.
4 changes: 2 additions & 2 deletions deps.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6030,8 +6030,8 @@ def go_dependencies():
name = "com_github_sourcegraph_zoekt",
build_file_proto_mode = "disable_global",
importpath = "github.com/sourcegraph/zoekt",
sum = "h1:Pn9dVnAOZ7z8p+4BH32G1U9XtMCBVATy8vAq0ObHFdo=",
version = "v0.0.0-20231121165958-0959170c1623",
sum = "h1:2xAotLrNXGdj1x8I5yPh89qsesICseLEfEdKpmY3V90=",
version = "v0.0.0-20231122214222-d982320abe7b",
)

go_repository(
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ require (
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20231121191755-214be625af21
github.com/sourcegraph/mountinfo v0.0.0-20231018142932-e00da332dac5
github.com/sourcegraph/sourcegraph/monitoring v0.0.0-20230124144931-b2d81b1accb6
github.com/sourcegraph/zoekt v0.0.0-20231121165958-0959170c1623
github.com/sourcegraph/zoekt v0.0.0-20231122214222-d982320abe7b
github.com/spf13/cobra v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1631,8 +1631,8 @@ github.com/sourcegraph/tiktoken-go v0.0.0-20230905173153-caab340cf008 h1:Wu8W50q
github.com/sourcegraph/tiktoken-go v0.0.0-20230905173153-caab340cf008/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o=
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
github.com/sourcegraph/zoekt v0.0.0-20231121165958-0959170c1623 h1:Pn9dVnAOZ7z8p+4BH32G1U9XtMCBVATy8vAq0ObHFdo=
github.com/sourcegraph/zoekt v0.0.0-20231121165958-0959170c1623/go.mod h1:WVDDy51tFgeKy8zXtujTSbqzgyJrqhrLC9sjWiEfAII=
github.com/sourcegraph/zoekt v0.0.0-20231122214222-d982320abe7b h1:2xAotLrNXGdj1x8I5yPh89qsesICseLEfEdKpmY3V90=
github.com/sourcegraph/zoekt v0.0.0-20231122214222-d982320abe7b/go.mod h1:WVDDy51tFgeKy8zXtujTSbqzgyJrqhrLC9sjWiEfAII=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
Expand Down
5 changes: 5 additions & 0 deletions internal/search/backend/index_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ type ZoektIndexOptions struct {
Error string `json:",omitempty"`

LanguageMap map[string]ctags_config.ParserType

ShardConcurrency int32 `json:",omitempty"`
}

func (o *ZoektIndexOptions) FromProto(p *proto.ZoektIndexOptions) {
Expand Down Expand Up @@ -87,6 +89,7 @@ func (o *ZoektIndexOptions) FromProto(p *proto.ZoektIndexOptions) {
languageMap[entry.Language] = uint8(entry.Ctags.Number())
}
o.LanguageMap = languageMap
o.ShardConcurrency = p.GetShardConcurrency()
}

func (o *ZoektIndexOptions) ToProto() *proto.ZoektIndexOptions {
Expand Down Expand Up @@ -116,6 +119,7 @@ func (o *ZoektIndexOptions) ToProto() *proto.ZoektIndexOptions {
DocumentRanksVersion: o.DocumentRanksVersion,
Error: o.Error,
LanguageMap: languageMap,
ShardConcurrency: o.ShardConcurrency,
}
}

Expand Down Expand Up @@ -212,6 +216,7 @@ func getIndexOptions(

DocumentRanksVersion: opts.DocumentRanksVersion,
LanguageMap: ctags_config.CreateEngineMap(*c),
ShardConcurrency: int32(c.SearchIndexShardConcurrency),
}

// Set of branch names. Always index HEAD
Expand Down
21 changes: 21 additions & 0 deletions internal/search/backend/index_options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,24 @@ func TestGetIndexOptions_batch(t *testing.T) {
t.Fatal("mismatch (-want, +got):\n", diff)
}
}
func TestGetIndexOptions_concurrency(t *testing.T) {
repos := []api.RepoID{1, 2, 3}
getRepoIndexOptions := func(repo api.RepoID) (*RepoIndexOptions, error) {
return &RepoIndexOptions{
GetVersion: func(branch string) (string, error) {
return fmt.Sprintf("!%s-%d", branch, repo), nil
},
}, nil
}
getSearchContextRevs := func(api.RepoID) ([]string, error) { return nil, nil }

wantConcurrency := 27
config := &schema.SiteConfiguration{SearchIndexShardConcurrency: wantConcurrency}
options := GetIndexOptions(config, getRepoIndexOptions, getSearchContextRevs, repos...)

for _, got := range options {
if wantConcurrency != int(got.ShardConcurrency) {
t.Fatalf("wrong shard concurrency, want: %d, got: %d", wantConcurrency, got.ShardConcurrency)
}
}
}
3 changes: 3 additions & 0 deletions schema/schema.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion schema/site.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
"group": "Search",
"examples": [true]
},
"search.index.shardConcurrency": {
"description": "The number of threads each indexserver should use to index shards. If not set, indexserver will use the number of available CPUs. This is exposed as a safeguard and should usually not require being set.",
"type": "integer",
"group": "Search",
"examples": ["10"]
},
"search.largeFiles": {
"description": "A list of file glob patterns where matching files will be indexed and searched regardless of their size. Files still need to be valid utf-8 to be indexed. The glob pattern syntax can be found here: https://github.com/bmatcuk/doublestar#patterns.",
"type": "array",
Expand All @@ -29,7 +35,7 @@
"description": "(debug) controls the amount of symbol search parallelism. Defaults to 20. It is not recommended to change this outside of debugging scenarios. This option will be removed in a future version.",
"type": "integer",
"group": "Debug",
"examples": [["20"]]
"examples": ["20"]
},
"cloneProgress.log": {
"description": "Whether clone progress should be logged to a file. If enabled, logs are written to files in the OS default path for temporary files.",
Expand Down

0 comments on commit 5a8bf40

Please sign in to comment.