diff --git a/build/builder.go b/build/builder.go index 82aba3bef..1838b777a 100644 --- a/build/builder.go +++ b/build/builder.go @@ -249,8 +249,8 @@ type Builder struct { docChecker zoekt.DocChecker size int - parserMap ctags.ParserMap - building sync.WaitGroup + parserFactory ctags.ParserFactory + building sync.WaitGroup errMu sync.Mutex buildError error @@ -563,20 +563,17 @@ func NewBuilder(opts Options) (*Builder, error) { finishedShards: map[string]string{}, } - parserMap, err := ctags.NewParserMap( - ctags.ParserBinMap{ - ctags.UniversalCTags: b.opts.CTagsPath, - ctags.ScipCTags: b.opts.ScipCTagsPath, - }, + parserFactory, err := ctags.NewParserFactory( + b.opts.CTagsPath, + b.opts.ScipCTagsPath, opts.LanguageMap, b.opts.CTagsMustSucceed, ) - if err != nil { return nil, err } - b.parserMap = parserMap + b.parserFactory = parserFactory b.shardLogger = &lumberjack.Logger{ Filename: filepath.Join(opts.IndexDir, "zoekt-builder-shard-log.tsv"), @@ -1021,7 +1018,7 @@ func sortDocuments(todo []*zoekt.Document) { func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { - err := ctagsAddSymbolsParserMap(todo, b.opts.LanguageMap, b.parserMap) + err := parseSymbols(todo, b.opts.LanguageMap, b.parserFactory) if b.opts.CTagsMustSucceed && err != nil { return nil, err } diff --git a/build/ctags.go b/build/ctags.go index 7308371f0..48a655776 100644 --- a/build/ctags.go +++ b/build/ctags.go @@ -42,12 +42,14 @@ func normalizeLanguage(filetype string) string { return normalized } -func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error { +func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserFactory ctags.ParserFactory) error { monitor := newMonitor() defer monitor.Stop() var tagsToSections tagsToSections + parsers := make(map[ctags.CTagsParserType]ctags.Parser) + for _, doc := range todo { if len(doc.Content) == 0 || doc.Symbols != nil { continue @@ -65,10 +67,16 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language parserKind = ctags.UniversalCTags } - parser := parserMap[parserKind] + parser := parsers[parserKind] if parser == nil { - // this happens if CTagsMustSucceed is false and we didn't find the binary - continue + // Spin up a new parser for this parser kind + parser = parserFactory.NewParser(parserKind) + if parser == nil { + // this happens if CTagsMustSucceed is false and we didn't find the binary + continue + } + parsers[parserKind] = parser + defer parser.Close() } monitor.BeginParsing(doc) diff --git a/build/ctags_test.go b/build/ctags_test.go index b052eecca..c694d433b 100644 --- a/build/ctags_test.go +++ b/build/ctags_test.go @@ -257,18 +257,22 @@ func BenchmarkTagsToSections(b *testing.B) { requireCTags(b) file, err := os.ReadFile("./testdata/large_file.cc") - parser, err := ctags.NewParser(ctags.UniversalCTags, "universal-ctags") if err != nil { b.Fatal(err) } - var tagsToSections tagsToSections + factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true) + if err != nil { + b.Fatal(err) + } + parser := factory.NewParser(ctags.UniversalCTags) entries, err := parser.Parse("./testdata/large_file.cc", file) if err != nil { b.Fatal(err) } + var tagsToSections tagsToSections secs, _, err := tagsToSections.Convert(file, entries) if err != nil { b.Fatal(err) diff --git a/ctags/json.go b/ctags/json.go index bdff60d69..a08c92eae 100644 --- a/ctags/json.go +++ b/ctags/json.go @@ -15,12 +15,7 @@ package ctags import ( - "bytes" "fmt" - "log" - "os" - "os/exec" - "strings" "sync" "time" @@ -118,44 +113,3 @@ func (lp *lockedParser) close() { lp.send = nil lp.recv = nil } - -// NewParser creates a parser that is implemented by the given -// universal-ctags binary. The parser is safe for concurrent use. -func NewParser(parserType CTagsParserType, bin string) (Parser, error) { - if err := checkBinary(parserType, bin); err != nil { - return nil, err - } - - opts := goctags.Options{ - Bin: bin, - } - if debug { - opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) - opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) - } - return &lockedParser{ - opts: opts, - }, nil -} - -// checkBinary does checks on bin to ensure we can correctly use the binary -// for symbols. It is more user friendly to fail early in this case. -func checkBinary(typ CTagsParserType, bin string) error { - switch typ { - case UniversalCTags: - helpOutput, err := exec.Command(bin, "--help").CombinedOutput() - if err != nil { - return fmt.Errorf("failed to check if %s is universal-ctags: %w\n--help output:\n%s", bin, err, string(helpOutput)) - } - if !bytes.Contains(helpOutput, []byte("+interactive")) { - return fmt.Errorf("ctags binary is not universal-ctags or is not compiled with +interactive feature: bin=%s", bin) - } - - case ScipCTags: - if !strings.Contains(bin, "scip-ctags") { - return fmt.Errorf("only supports scip-ctags, not %s", bin) - } - } - - return nil -} diff --git a/ctags/json_test.go b/ctags/json_test.go index 5a6304f43..21dabac78 100644 --- a/ctags/json_test.go +++ b/ctags/json_test.go @@ -27,11 +27,12 @@ func TestJSON(t *testing.T) { t.Skip(err) } - p, err := NewParser(UniversalCTags, "universal-ctags") + factory, err := NewParserFactory("universal-ctags", "", LanguageMap{}, true) if err != nil { - t.Fatal("newProcess", err) + t.Fatal(err) } + p := factory.NewParser(UniversalCTags) defer p.Close() java := ` diff --git a/ctags/parser_factory.go b/ctags/parser_factory.go new file mode 100644 index 000000000..ac56d27b3 --- /dev/null +++ b/ctags/parser_factory.go @@ -0,0 +1,133 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ctags + +import ( + "bytes" + "fmt" + "log" + "os" + "os/exec" + "strings" + + goctags "github.com/sourcegraph/go-ctags" +) + +type CTagsParserType uint8 + +const ( + UnknownCTags CTagsParserType = iota + NoCTags + UniversalCTags + ScipCTags +) + +type LanguageMap = map[string]CTagsParserType + +func ParserToString(parser CTagsParserType) string { + switch parser { + case UnknownCTags: + return "unknown" + case NoCTags: + return "no" + case UniversalCTags: + return "universal" + case ScipCTags: + return "scip" + default: + panic("Reached impossible CTagsParserType state") + } +} + +func StringToParser(str string) CTagsParserType { + switch str { + case "no": + return NoCTags + case "universal": + return UniversalCTags + case "scip": + return ScipCTags + default: + return UniversalCTags + } +} + +type ParserFactory map[CTagsParserType]string + +func NewParserFactory( + ctagsPath string, + scipCTagsPath string, + languageMap LanguageMap, + cTagsMustSucceed bool, +) (ParserFactory, error) { + validBins := make(map[CTagsParserType]string) + requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath} + for _, parserType := range languageMap { + if parserType == ScipCTags { + requiredBins[ScipCTags] = scipCTagsPath + break + } + } + + for parserType, bin := range requiredBins { + if bin == "" && cTagsMustSucceed { + return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) + } + if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed { + return nil, fmt.Errorf("ctags.NewParserFactory: %v", err) + } + validBins[parserType] = bin + } + + return validBins, nil +} + +// checkBinary does checks on bin to ensure we can correctly use the binary +// for symbols. It is more user friendly to fail early in this case. +func checkBinary(typ CTagsParserType, bin string) error { + switch typ { + case UniversalCTags: + helpOutput, err := exec.Command(bin, "--help").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to check if %s is universal-ctags: %w\n--help output:\n%s", bin, err, string(helpOutput)) + } + if !bytes.Contains(helpOutput, []byte("+interactive")) { + return fmt.Errorf("ctags binary is not universal-ctags or is not compiled with +interactive feature: bin=%s", bin) + } + + case ScipCTags: + if !strings.Contains(bin, "scip-ctags") { + return fmt.Errorf("only supports scip-ctags, not %s", bin) + } + } + + return nil +} + +// NewParser creates a parser that is implemented by the given +// ctags binary. The parser is safe for concurrent use. +func (p ParserFactory) NewParser(typ CTagsParserType) Parser { + bin := p[typ] + if bin == "" { + return nil + } + + opts := goctags.Options{Bin: bin} + if debug { + opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) + opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) + } + return &lockedParser{opts: opts} +} diff --git a/ctags/parser_map.go b/ctags/parser_map.go deleted file mode 100644 index c30fa9013..000000000 --- a/ctags/parser_map.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2017 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ctags - -import ( - "fmt" -) - -type CTagsParserType uint8 - -const ( - UnknownCTags CTagsParserType = iota - NoCTags - UniversalCTags - ScipCTags -) - -type LanguageMap = map[string]CTagsParserType - -func ParserToString(parser CTagsParserType) string { - switch parser { - case UnknownCTags: - return "unknown" - case NoCTags: - return "no" - case UniversalCTags: - return "universal" - case ScipCTags: - return "scip" - default: - panic("Reached impossible CTagsParserType state") - } -} - -func StringToParser(str string) CTagsParserType { - switch str { - case "no": - return NoCTags - case "universal": - return UniversalCTags - case "scip": - return ScipCTags - default: - return UniversalCTags - } -} - -type ParserMap map[CTagsParserType]Parser -type ParserBinMap map[CTagsParserType]string - -func NewParserMap(bins ParserBinMap, languageMap LanguageMap, cTagsMustSucceed bool) (ParserMap, error) { - parsers := make(ParserMap) - - requiredTypes := []CTagsParserType{UniversalCTags} - for _, parserType := range languageMap { - if parserType == ScipCTags { - requiredTypes = append(requiredTypes, ScipCTags) - break - } - } - - for _, parserType := range requiredTypes { - bin := bins[parserType] - if bin == "" && cTagsMustSucceed { - return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) - } else { - parser, err := NewParser(parserType, bin) - if err != nil && cTagsMustSucceed { - return nil, fmt.Errorf("ctags.NewParserMap: %v", err) - } - - parsers[parserType] = parser - } - } - - return parsers, nil -}