From 7b678e15c3489c7d7dea7ad709dc2eed46c5a1f5 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 6 Dec 2023 08:53:18 -0800 Subject: [PATCH] Indexing: clean up ctags parser wrapper (#708) This change cleans up the Go ctags parser wrapper as a follow-up to #702. Specific changes: * Remove synchronization in `lockedParser` and rename it to `CTagsParser` * Push delegation to universal vs. SCIP ctags into parser wrapper * Simplify document timeout logic * Rename some files --- build/builder.go | 12 +- build/ctags.go | 29 ++--- build/ctags_test.go | 6 +- ctags/json.go | 115 -------------------- ctags/parser.go | 99 +++++++++++++++++ ctags/{parser_factory.go => parser_bins.go} | 29 +---- ctags/{json_test.go => parser_test.go} | 9 +- 7 files changed, 125 insertions(+), 174 deletions(-) delete mode 100644 ctags/json.go create mode 100644 ctags/parser.go rename ctags/{parser_factory.go => parser_bins.go} (80%) rename ctags/{json_test.go => parser_test.go} (92%) diff --git a/build/builder.go b/build/builder.go index de6e1508d..cc410aa4a 100644 --- a/build/builder.go +++ b/build/builder.go @@ -247,9 +247,9 @@ type Builder struct { todo []*zoekt.Document docChecker zoekt.DocChecker size int - - parserFactory ctags.ParserFactory - building sync.WaitGroup + + parserBins ctags.ParserBinMap + building sync.WaitGroup errMu sync.Mutex buildError error @@ -560,7 +560,7 @@ func NewBuilder(opts Options) (*Builder, error) { finishedShards: map[string]string{}, } - parserFactory, err := ctags.NewParserFactory( + parserBins, err := ctags.NewParserBinMap( b.opts.CTagsPath, b.opts.ScipCTagsPath, opts.LanguageMap, @@ -570,7 +570,7 @@ func NewBuilder(opts Options) (*Builder, error) { return nil, err } - b.parserFactory = parserFactory + b.parserBins = parserBins if opts.IsDelta { // Delta shards build on top of previously existing shards. @@ -994,7 +994,7 @@ func sortDocuments(todo []*zoekt.Document) { func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { - err := parseSymbols(todo, b.opts.LanguageMap, b.parserFactory) + err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins) if b.opts.CTagsMustSucceed && err != nil { return nil, err } diff --git a/build/ctags.go b/build/ctags.go index 48a655776..ff0c91cd7 100644 --- a/build/ctags.go +++ b/build/ctags.go @@ -42,13 +42,14 @@ func normalizeLanguage(filetype string) string { return normalized } -func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserFactory ctags.ParserFactory) error { +func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { monitor := newMonitor() defer monitor.Stop() var tagsToSections tagsToSections - parsers := make(map[ctags.CTagsParserType]ctags.Parser) + parser := ctags.NewCTagsParser(parserBins) + defer parser.Close() for _, doc := range todo { if len(doc.Content) == 0 || doc.Symbols != nil { @@ -57,30 +58,18 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserF zoekt.DetermineLanguageIfUnknown(doc) - parserKind := languageMap[normalizeLanguage(doc.Language)] - if parserKind == ctags.NoCTags { + parserType := languageMap[normalizeLanguage(doc.Language)] + if parserType == ctags.NoCTags { continue } - // If the parser kind is unknown, default to universal-ctags - if parserKind == ctags.UnknownCTags { - parserKind = ctags.UniversalCTags - } - - parser := parsers[parserKind] - if parser == nil { - // Spin up a new parser for this parser kind - parser = parserFactory.NewParser(parserKind) - if parser == nil { - // this happens if CTagsMustSucceed is false and we didn't find the binary - continue - } - parsers[parserKind] = parser - defer parser.Close() + // If the parser type is unknown, default to universal-ctags + if parserType == ctags.UnknownCTags { + parserType = ctags.UniversalCTags } monitor.BeginParsing(doc) - es, err := parser.Parse(doc.Name, doc.Content) + es, err := parser.Parse(doc.Name, doc.Content, parserType) monitor.EndParsing(es) if err != nil { diff --git a/build/ctags_test.go b/build/ctags_test.go index c694d433b..185322982 100644 --- a/build/ctags_test.go +++ b/build/ctags_test.go @@ -261,13 +261,13 @@ func BenchmarkTagsToSections(b *testing.B) { b.Fatal(err) } - factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true) + bins, err := ctags.NewParserBinMap("universal-ctags", "", ctags.LanguageMap{}, true) if err != nil { b.Fatal(err) } - parser := factory.NewParser(ctags.UniversalCTags) - entries, err := parser.Parse("./testdata/large_file.cc", file) + parser := ctags.NewCTagsParser(bins) + entries, err := parser.Parse("./testdata/large_file.cc", file, ctags.UniversalCTags) if err != nil { b.Fatal(err) } diff --git a/ctags/json.go b/ctags/json.go deleted file mode 100644 index a08c92eae..000000000 --- a/ctags/json.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2017 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ctags - -import ( - "fmt" - "sync" - "time" - - goctags "github.com/sourcegraph/go-ctags" -) - -const debug = false - -type Parser = goctags.Parser -type Entry = goctags.Entry - -type parseReq struct { - Name string - Content []byte -} - -type parseResp struct { - Entries []*Entry - Err error -} - -type lockedParser struct { - mu sync.Mutex - opts goctags.Options - p Parser - send chan<- parseReq - recv <-chan parseResp -} - -// parseTimeout is how long we wait for a response for parsing a single file -// in ctags. 1 minute is a very conservative timeout which we should only hit -// if ctags hangs. -const parseTimeout = time.Minute - -// Parse wraps go-ctags Parse. It lazily starts the process and adds a timeout -// around parse requests. Additionally it serializes access to the parsing -// process. The timeout is important since we occasionally come across -// documents which hang universal-ctags. -func (lp *lockedParser) Parse(name string, content []byte) ([]*Entry, error) { - lp.mu.Lock() - defer lp.mu.Unlock() - - if lp.p == nil { - p, err := goctags.New(lp.opts) - if err != nil { - return nil, err - } - send := make(chan parseReq) - // buf of 1 so we avoid blocking sends in the parser if we exit early. - recv := make(chan parseResp, 1) - - go func() { - defer close(recv) - for req := range send { - entries, err := p.Parse(req.Name, req.Content) - recv <- parseResp{Entries: entries, Err: err} - } - }() - - lp.p = p - lp.send = send - lp.recv = recv - } - - lp.send <- parseReq{Name: name, Content: content} - - deadline := time.NewTimer(parseTimeout) - defer deadline.Stop() - - select { - case resp := <-lp.recv: - return resp.Entries, resp.Err - case <-deadline.C: - // Error out since ctags hanging is a sign something bad is happening. - lp.close() - return nil, fmt.Errorf("ctags timedout after %s parsing %s", parseTimeout, name) - } -} - -func (lp *lockedParser) Close() { - lp.mu.Lock() - defer lp.mu.Unlock() - lp.close() -} - -// close assumes lp.mu is held. -func (lp *lockedParser) close() { - if lp.p == nil { - return - } - - lp.p.Close() - lp.p = nil - close(lp.send) - lp.send = nil - lp.recv = nil -} diff --git a/ctags/parser.go b/ctags/parser.go new file mode 100644 index 000000000..84535fee6 --- /dev/null +++ b/ctags/parser.go @@ -0,0 +1,99 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ctags + +import ( + "fmt" + "log" + "os" + "time" + + goctags "github.com/sourcegraph/go-ctags" +) + +type Entry = goctags.Entry + +// CTagsParser wraps go-ctags and delegates to the right process (like universal-ctags or scip-ctags). +// It is only safe for single-threaded use. This wrapper also enforces a timeout on parsing a single +// document, which is important since documents can occasionally hang universal-ctags. +// documents which hang universal-ctags. +type CTagsParser struct { + bins ParserBinMap + parsers map[CTagsParserType]goctags.Parser +} + +// parseTimeout is how long we wait for a response for parsing a single file +// in ctags. 1 minute is a very conservative timeout which we should only hit +// if ctags hangs. +const parseTimeout = time.Minute + +func NewCTagsParser(bins ParserBinMap) CTagsParser { + return CTagsParser{bins: bins, parsers: make(map[CTagsParserType]goctags.Parser)} +} + +type parseResult struct { + entries []*Entry + err error +} + +func (lp *CTagsParser) Parse(name string, content []byte, typ CTagsParserType) ([]*Entry, error) { + if lp.parsers[typ] == nil { + parser, err := lp.newParserProcess(typ) + if parser == nil || err != nil { + return nil, err + } + lp.parsers[typ] = parser + } + + deadline := time.NewTimer(parseTimeout) + defer deadline.Stop() + + parser := lp.parsers[typ] + recv := make(chan parseResult, 1) + go func() { + entry, err := parser.Parse(name, content) + recv <- parseResult{entries: entry, err: err} + }() + + select { + case resp := <-recv: + return resp.entries, resp.err + case <-deadline.C: + // Error out since ctags hanging is a sign something bad is happening. + return nil, fmt.Errorf("ctags timedout after %s parsing %s", parseTimeout, name) + } +} + +func (lp *CTagsParser) newParserProcess(typ CTagsParserType) (goctags.Parser, error) { + bin := lp.bins[typ] + if bin == "" { + // This happens if CTagsMustSucceed is false and we didn't find the binary + return nil, nil + } + + opts := goctags.Options{Bin: bin} + parserType := ParserToString(typ) + if debug { + opts.Info = log.New(os.Stderr, "CTAGS (" + parserType + ") INF: ", log.LstdFlags) + opts.Debug = log.New(os.Stderr, "CTAGS (" + parserType + ") DBG: ", log.LstdFlags) + } + return goctags.New(opts) +} + +func (lp *CTagsParser) Close() { + for _, parser := range lp.parsers { + parser.Close() + } +} diff --git a/ctags/parser_factory.go b/ctags/parser_bins.go similarity index 80% rename from ctags/parser_factory.go rename to ctags/parser_bins.go index ac56d27b3..bf378631b 100644 --- a/ctags/parser_factory.go +++ b/ctags/parser_bins.go @@ -17,12 +17,8 @@ package ctags import ( "bytes" "fmt" - "log" - "os" "os/exec" "strings" - - goctags "github.com/sourcegraph/go-ctags" ) type CTagsParserType uint8 @@ -34,6 +30,8 @@ const ( ScipCTags ) +const debug = false + type LanguageMap = map[string]CTagsParserType func ParserToString(parser CTagsParserType) string { @@ -64,14 +62,14 @@ func StringToParser(str string) CTagsParserType { } } -type ParserFactory map[CTagsParserType]string +type ParserBinMap map[CTagsParserType]string -func NewParserFactory( +func NewParserBinMap( ctagsPath string, scipCTagsPath string, languageMap LanguageMap, cTagsMustSucceed bool, -) (ParserFactory, error) { +) (ParserBinMap, error) { validBins := make(map[CTagsParserType]string) requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath} for _, parserType := range languageMap { @@ -86,7 +84,7 @@ func NewParserFactory( return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) } if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed { - return nil, fmt.Errorf("ctags.NewParserFactory: %v", err) + return nil, fmt.Errorf("ctags.NewParserBinMap: %v", err) } validBins[parserType] = bin } @@ -116,18 +114,3 @@ func checkBinary(typ CTagsParserType, bin string) error { return nil } -// NewParser creates a parser that is implemented by the given -// ctags binary. The parser is safe for concurrent use. -func (p ParserFactory) NewParser(typ CTagsParserType) Parser { - bin := p[typ] - if bin == "" { - return nil - } - - opts := goctags.Options{Bin: bin} - if debug { - opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) - opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) - } - return &lockedParser{opts: opts} -} diff --git a/ctags/json_test.go b/ctags/parser_test.go similarity index 92% rename from ctags/json_test.go rename to ctags/parser_test.go index 21dabac78..95c4c44e3 100644 --- a/ctags/json_test.go +++ b/ctags/parser_test.go @@ -27,12 +27,7 @@ func TestJSON(t *testing.T) { t.Skip(err) } - factory, err := NewParserFactory("universal-ctags", "", LanguageMap{}, true) - if err != nil { - t.Fatal(err) - } - - p := factory.NewParser(UniversalCTags) + p := NewCTagsParser(map[CTagsParserType]string{UniversalCTags: "universal-ctags"}) defer p.Close() java := ` @@ -50,7 +45,7 @@ class Back implements Future extends Frob { } ` name := "io/zoekt/Back.java" - got, err := p.Parse(name, []byte(java)) + got, err := p.Parse(name, []byte(java), UniversalCTags) if err != nil { t.Errorf("Process: %v", err) }