Skip to content

Commit

Permalink
Indexing: clean up ctags parser wrapper (#708)
Browse files Browse the repository at this point in the history
This change cleans up the Go ctags parser wrapper as a follow-up to #702. Specific changes:
* Remove synchronization in `lockedParser` and rename it to `CTagsParser` 
* Push delegation to universal vs. SCIP ctags into parser wrapper
* Simplify document timeout logic
* Rename some files
  • Loading branch information
jtibshirani authored Dec 6, 2023
1 parent 51fc9db commit 7b678e1
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 174 deletions.
12 changes: 6 additions & 6 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,9 @@ type Builder struct {
todo []*zoekt.Document
docChecker zoekt.DocChecker
size int

parserFactory ctags.ParserFactory
building sync.WaitGroup
parserBins ctags.ParserBinMap
building sync.WaitGroup

errMu sync.Mutex
buildError error
Expand Down Expand Up @@ -560,7 +560,7 @@ func NewBuilder(opts Options) (*Builder, error) {
finishedShards: map[string]string{},
}

parserFactory, err := ctags.NewParserFactory(
parserBins, err := ctags.NewParserBinMap(
b.opts.CTagsPath,
b.opts.ScipCTagsPath,
opts.LanguageMap,
Expand All @@ -570,7 +570,7 @@ func NewBuilder(opts Options) (*Builder, error) {
return nil, err
}

b.parserFactory = parserFactory
b.parserBins = parserBins

if opts.IsDelta {
// Delta shards build on top of previously existing shards.
Expand Down Expand Up @@ -994,7 +994,7 @@ func sortDocuments(todo []*zoekt.Document) {

func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) {
if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") {
err := parseSymbols(todo, b.opts.LanguageMap, b.parserFactory)
err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins)
if b.opts.CTagsMustSucceed && err != nil {
return nil, err
}
Expand Down
29 changes: 9 additions & 20 deletions build/ctags.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ func normalizeLanguage(filetype string) string {
return normalized
}

func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserFactory ctags.ParserFactory) error {
func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error {
monitor := newMonitor()
defer monitor.Stop()

var tagsToSections tagsToSections

parsers := make(map[ctags.CTagsParserType]ctags.Parser)
parser := ctags.NewCTagsParser(parserBins)
defer parser.Close()

for _, doc := range todo {
if len(doc.Content) == 0 || doc.Symbols != nil {
Expand All @@ -57,30 +58,18 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserF

zoekt.DetermineLanguageIfUnknown(doc)

parserKind := languageMap[normalizeLanguage(doc.Language)]
if parserKind == ctags.NoCTags {
parserType := languageMap[normalizeLanguage(doc.Language)]
if parserType == ctags.NoCTags {
continue
}

// If the parser kind is unknown, default to universal-ctags
if parserKind == ctags.UnknownCTags {
parserKind = ctags.UniversalCTags
}

parser := parsers[parserKind]
if parser == nil {
// Spin up a new parser for this parser kind
parser = parserFactory.NewParser(parserKind)
if parser == nil {
// this happens if CTagsMustSucceed is false and we didn't find the binary
continue
}
parsers[parserKind] = parser
defer parser.Close()
// If the parser type is unknown, default to universal-ctags
if parserType == ctags.UnknownCTags {
parserType = ctags.UniversalCTags
}

monitor.BeginParsing(doc)
es, err := parser.Parse(doc.Name, doc.Content)
es, err := parser.Parse(doc.Name, doc.Content, parserType)
monitor.EndParsing(es)

if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions build/ctags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,13 @@ func BenchmarkTagsToSections(b *testing.B) {
b.Fatal(err)
}

factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true)
bins, err := ctags.NewParserBinMap("universal-ctags", "", ctags.LanguageMap{}, true)
if err != nil {
b.Fatal(err)
}

parser := factory.NewParser(ctags.UniversalCTags)
entries, err := parser.Parse("./testdata/large_file.cc", file)
parser := ctags.NewCTagsParser(bins)
entries, err := parser.Parse("./testdata/large_file.cc", file, ctags.UniversalCTags)
if err != nil {
b.Fatal(err)
}
Expand Down
115 changes: 0 additions & 115 deletions ctags/json.go

This file was deleted.

99 changes: 99 additions & 0 deletions ctags/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ctags

import (
"fmt"
"log"
"os"
"time"

goctags "github.com/sourcegraph/go-ctags"
)

type Entry = goctags.Entry

// CTagsParser wraps go-ctags and delegates to the right process (like universal-ctags or scip-ctags).
// It is only safe for single-threaded use. This wrapper also enforces a timeout on parsing a single
// document, which is important since documents can occasionally hang universal-ctags.
// documents which hang universal-ctags.
type CTagsParser struct {
bins ParserBinMap
parsers map[CTagsParserType]goctags.Parser
}

// parseTimeout is how long we wait for a response for parsing a single file
// in ctags. 1 minute is a very conservative timeout which we should only hit
// if ctags hangs.
const parseTimeout = time.Minute

func NewCTagsParser(bins ParserBinMap) CTagsParser {
return CTagsParser{bins: bins, parsers: make(map[CTagsParserType]goctags.Parser)}
}

type parseResult struct {
entries []*Entry
err error
}

func (lp *CTagsParser) Parse(name string, content []byte, typ CTagsParserType) ([]*Entry, error) {
if lp.parsers[typ] == nil {
parser, err := lp.newParserProcess(typ)
if parser == nil || err != nil {
return nil, err
}
lp.parsers[typ] = parser
}

deadline := time.NewTimer(parseTimeout)
defer deadline.Stop()

parser := lp.parsers[typ]
recv := make(chan parseResult, 1)
go func() {
entry, err := parser.Parse(name, content)
recv <- parseResult{entries: entry, err: err}
}()

select {
case resp := <-recv:
return resp.entries, resp.err
case <-deadline.C:
// Error out since ctags hanging is a sign something bad is happening.
return nil, fmt.Errorf("ctags timedout after %s parsing %s", parseTimeout, name)
}
}

func (lp *CTagsParser) newParserProcess(typ CTagsParserType) (goctags.Parser, error) {
bin := lp.bins[typ]
if bin == "" {
// This happens if CTagsMustSucceed is false and we didn't find the binary
return nil, nil
}

opts := goctags.Options{Bin: bin}
parserType := ParserToString(typ)
if debug {
opts.Info = log.New(os.Stderr, "CTAGS (" + parserType + ") INF: ", log.LstdFlags)
opts.Debug = log.New(os.Stderr, "CTAGS (" + parserType + ") DBG: ", log.LstdFlags)
}
return goctags.New(opts)
}

func (lp *CTagsParser) Close() {
for _, parser := range lp.parsers {
parser.Close()
}
}
29 changes: 6 additions & 23 deletions ctags/parser_factory.go → ctags/parser_bins.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@ package ctags
import (
"bytes"
"fmt"
"log"
"os"
"os/exec"
"strings"

goctags "github.com/sourcegraph/go-ctags"
)

type CTagsParserType uint8
Expand All @@ -34,6 +30,8 @@ const (
ScipCTags
)

const debug = false

type LanguageMap = map[string]CTagsParserType

func ParserToString(parser CTagsParserType) string {
Expand Down Expand Up @@ -64,14 +62,14 @@ func StringToParser(str string) CTagsParserType {
}
}

type ParserFactory map[CTagsParserType]string
type ParserBinMap map[CTagsParserType]string

func NewParserFactory(
func NewParserBinMap(
ctagsPath string,
scipCTagsPath string,
languageMap LanguageMap,
cTagsMustSucceed bool,
) (ParserFactory, error) {
) (ParserBinMap, error) {
validBins := make(map[CTagsParserType]string)
requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath}
for _, parserType := range languageMap {
Expand All @@ -86,7 +84,7 @@ func NewParserFactory(
return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType))
}
if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed {
return nil, fmt.Errorf("ctags.NewParserFactory: %v", err)
return nil, fmt.Errorf("ctags.NewParserBinMap: %v", err)
}
validBins[parserType] = bin
}
Expand Down Expand Up @@ -116,18 +114,3 @@ func checkBinary(typ CTagsParserType, bin string) error {
return nil
}

// NewParser creates a parser that is implemented by the given
// ctags binary. The parser is safe for concurrent use.
func (p ParserFactory) NewParser(typ CTagsParserType) Parser {
bin := p[typ]
if bin == "" {
return nil
}

opts := goctags.Options{Bin: bin}
if debug {
opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags)
opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags)
}
return &lockedParser{opts: opts}
}
Loading

0 comments on commit 7b678e1

Please sign in to comment.