From 85d2cafd91ab734a3a232dffeb5c43a7f27dd268 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 20 Nov 2023 10:46:48 -0800 Subject: [PATCH] Create a new parser for every shard --- build/builder.go | 7 ++----- build/ctags.go | 14 +++++++++++--- build/ctags_test.go | 11 +++++++++-- ctags/json.go | 15 --------------- ctags/json_test.go | 7 ++++++- ctags/parser_map.go | 46 ++++++++++++++++++++++++++++++++------------- 6 files changed, 61 insertions(+), 39 deletions(-) diff --git a/build/builder.go b/build/builder.go index fa666821..1838b777 100644 --- a/build/builder.go +++ b/build/builder.go @@ -564,14 +564,11 @@ func NewBuilder(opts Options) (*Builder, error) { } parserFactory, err := ctags.NewParserFactory( - ctags.ParserBinMap{ - ctags.UniversalCTags: b.opts.CTagsPath, - ctags.ScipCTags: b.opts.ScipCTagsPath, - }, + b.opts.CTagsPath, + b.opts.ScipCTagsPath, opts.LanguageMap, b.opts.CTagsMustSucceed, ) - if err != nil { return nil, err } diff --git a/build/ctags.go b/build/ctags.go index 074ba53a..48a65577 100644 --- a/build/ctags.go +++ b/build/ctags.go @@ -48,6 +48,8 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserF var tagsToSections tagsToSections + parsers := make(map[ctags.CTagsParserType]ctags.Parser) + for _, doc := range todo { if len(doc.Content) == 0 || doc.Symbols != nil { continue @@ -65,10 +67,16 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserF parserKind = ctags.UniversalCTags } - parser := parserFactory[parserKind] + parser := parsers[parserKind] if parser == nil { - // this happens if CTagsMustSucceed is false and we didn't find the binary - continue + // Spin up a new parser for this parser kind + parser = parserFactory.NewParser(parserKind) + if parser == nil { + // this happens if CTagsMustSucceed is false and we didn't find the binary + continue + } + parsers[parserKind] = parser + defer parser.Close() } monitor.BeginParsing(doc) diff --git a/build/ctags_test.go b/build/ctags_test.go index ea9b3d8f..c694d433 100644 --- a/build/ctags_test.go +++ b/build/ctags_test.go @@ -257,15 +257,22 @@ func BenchmarkTagsToSections(b *testing.B) { requireCTags(b) file, err := os.ReadFile("./testdata/large_file.cc") - parser := ctags.NewParser("universal-ctags") + if err != nil { + b.Fatal(err) + } - var tagsToSections tagsToSections + factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true) + if err != nil { + b.Fatal(err) + } + parser := factory.NewParser(ctags.UniversalCTags) entries, err := parser.Parse("./testdata/large_file.cc", file) if err != nil { b.Fatal(err) } + var tagsToSections tagsToSections secs, _, err := tagsToSections.Convert(file, entries) if err != nil { b.Fatal(err) diff --git a/ctags/json.go b/ctags/json.go index 41ca0930..a08c92ea 100644 --- a/ctags/json.go +++ b/ctags/json.go @@ -16,8 +16,6 @@ package ctags import ( "fmt" - "log" - "os" "sync" "time" @@ -115,16 +113,3 @@ func (lp *lockedParser) close() { lp.send = nil lp.recv = nil } - -// NewParser creates a parser that is implemented by the given -// universal-ctags binary. The parser is safe for concurrent use. -func NewParser(bin string) Parser { - opts := goctags.Options{ - Bin: bin, - } - if debug { - opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) - opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) - } - return &lockedParser{opts: opts,} -} diff --git a/ctags/json_test.go b/ctags/json_test.go index 45690c24..21dabac7 100644 --- a/ctags/json_test.go +++ b/ctags/json_test.go @@ -27,7 +27,12 @@ func TestJSON(t *testing.T) { t.Skip(err) } - p := NewParser("universal-ctags") + factory, err := NewParserFactory("universal-ctags", "", LanguageMap{}, true) + if err != nil { + t.Fatal(err) + } + + p := factory.NewParser(UniversalCTags) defer p.Close() java := ` diff --git a/ctags/parser_map.go b/ctags/parser_map.go index 73f235bd..ac56d27b 100644 --- a/ctags/parser_map.go +++ b/ctags/parser_map.go @@ -17,8 +17,12 @@ package ctags import ( "bytes" "fmt" + "log" + "os" "os/exec" "strings" + + goctags "github.com/sourcegraph/go-ctags" ) type CTagsParserType uint8 @@ -60,34 +64,34 @@ func StringToParser(str string) CTagsParserType { } } -type ParserFactory map[CTagsParserType]Parser -type ParserBinMap map[CTagsParserType]string - -func NewParserFactory(bins ParserBinMap, languageMap LanguageMap, cTagsMustSucceed bool) (ParserFactory, error) { - parsers := make(ParserFactory) +type ParserFactory map[CTagsParserType]string - requiredTypes := []CTagsParserType{UniversalCTags} +func NewParserFactory( + ctagsPath string, + scipCTagsPath string, + languageMap LanguageMap, + cTagsMustSucceed bool, +) (ParserFactory, error) { + validBins := make(map[CTagsParserType]string) + requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath} for _, parserType := range languageMap { if parserType == ScipCTags { - requiredTypes = append(requiredTypes, ScipCTags) + requiredBins[ScipCTags] = scipCTagsPath break } } - for _, parserType := range requiredTypes { - bin := bins[parserType] + for parserType, bin := range requiredBins { if bin == "" && cTagsMustSucceed { return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) } - if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed { return nil, fmt.Errorf("ctags.NewParserFactory: %v", err) } - - parsers[parserType] = NewParser(bin) + validBins[parserType] = bin } - return parsers, nil + return validBins, nil } // checkBinary does checks on bin to ensure we can correctly use the binary @@ -111,3 +115,19 @@ func checkBinary(typ CTagsParserType, bin string) error { return nil } + +// NewParser creates a parser that is implemented by the given +// ctags binary. The parser is safe for concurrent use. +func (p ParserFactory) NewParser(typ CTagsParserType) Parser { + bin := p[typ] + if bin == "" { + return nil + } + + opts := goctags.Options{Bin: bin} + if debug { + opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) + opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) + } + return &lockedParser{opts: opts} +}