Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Indexing: clean up ctags parser wrapper #708

Merged
merged 9 commits into from
Dec 6, 2023
10 changes: 5 additions & 5 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ type Builder struct {
docChecker zoekt.DocChecker
size int

parserFactory ctags.ParserFactory
building sync.WaitGroup
parserBins ctags.ParserBinMap
building sync.WaitGroup

errMu sync.Mutex
buildError error
Expand Down Expand Up @@ -563,7 +563,7 @@ func NewBuilder(opts Options) (*Builder, error) {
finishedShards: map[string]string{},
}

parserFactory, err := ctags.NewParserFactory(
parserBins, err := ctags.NewParserBinMap(
b.opts.CTagsPath,
b.opts.ScipCTagsPath,
opts.LanguageMap,
Expand All @@ -573,7 +573,7 @@ func NewBuilder(opts Options) (*Builder, error) {
return nil, err
}

b.parserFactory = parserFactory
b.parserBins = parserBins

b.shardLogger = &lumberjack.Logger{
Filename: filepath.Join(opts.IndexDir, "zoekt-builder-shard-log.tsv"),
Expand Down Expand Up @@ -1018,7 +1018,7 @@ func sortDocuments(todo []*zoekt.Document) {

func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) {
if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") {
err := parseSymbols(todo, b.opts.LanguageMap, b.parserFactory)
err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins)
if b.opts.CTagsMustSucceed && err != nil {
return nil, err
}
Expand Down
29 changes: 9 additions & 20 deletions build/ctags.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ func normalizeLanguage(filetype string) string {
return normalized
}

func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserFactory ctags.ParserFactory) error {
func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error {
monitor := newMonitor()
defer monitor.Stop()

var tagsToSections tagsToSections

parsers := make(map[ctags.CTagsParserType]ctags.Parser)
parser := ctags.NewCTagsParser(parserBins)
defer parser.Close()

for _, doc := range todo {
if len(doc.Content) == 0 || doc.Symbols != nil {
Expand All @@ -57,30 +58,18 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserF

zoekt.DetermineLanguageIfUnknown(doc)

parserKind := languageMap[normalizeLanguage(doc.Language)]
if parserKind == ctags.NoCTags {
parserType := languageMap[normalizeLanguage(doc.Language)]
if parserType == ctags.NoCTags {
continue
}

// If the parser kind is unknown, default to universal-ctags
if parserKind == ctags.UnknownCTags {
parserKind = ctags.UniversalCTags
}

parser := parsers[parserKind]
if parser == nil {
// Spin up a new parser for this parser kind
parser = parserFactory.NewParser(parserKind)
if parser == nil {
// this happens if CTagsMustSucceed is false and we didn't find the binary
continue
}
parsers[parserKind] = parser
defer parser.Close()
// If the parser type is unknown, default to universal-ctags
if parserType == ctags.UnknownCTags {
parserType = ctags.UniversalCTags
}

monitor.BeginParsing(doc)
es, err := parser.Parse(doc.Name, doc.Content)
es, err := parser.Parse(doc.Name, doc.Content, parserType)
monitor.EndParsing(es)

if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions build/ctags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,13 @@ func BenchmarkTagsToSections(b *testing.B) {
b.Fatal(err)
}

factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true)
bins, err := ctags.NewParserBinMap("universal-ctags", "", ctags.LanguageMap{}, true)
if err != nil {
b.Fatal(err)
}

parser := factory.NewParser(ctags.UniversalCTags)
entries, err := parser.Parse("./testdata/large_file.cc", file)
parser := ctags.NewCTagsParser(bins)
entries, err := parser.Parse("./testdata/large_file.cc", file, ctags.UniversalCTags)
if err != nil {
b.Fatal(err)
}
Expand Down
53 changes: 36 additions & 17 deletions ctags/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,55 @@
package ctags

import (
"log"
"os"

goctags "github.com/sourcegraph/go-ctags"
)

const debug = false

type Parser = goctags.Parser
type Entry = goctags.Entry

type lockedParser struct {
opts goctags.Options
p Parser
// CTagsParser wraps go-ctags and delegates to the right process (like universal-ctags or scip-ctags).
// It is only safe for single-threaded use.
type CTagsParser struct {
bins ParserBinMap
parsers map[CTagsParserType]goctags.Parser
}

// Parse wraps go-ctags Parse and lazily starts the process.
func (lp *lockedParser) Parse(name string, content []byte) ([]*Entry, error) {
if lp.p == nil {
p, err := goctags.New(lp.opts)
func NewCTagsParser(bins ParserBinMap) CTagsParser {
return CTagsParser{bins: bins, parsers: make(map[CTagsParserType]goctags.Parser)}
}

func (lp *CTagsParser) Parse(name string, content []byte, typ CTagsParserType) ([]*Entry, error) {
if lp.parsers[typ] == nil {
parser, err := lp.newCTagsParser(typ)
if err != nil {
return nil, err
}
lp.p = p
lp.parsers[typ] = parser
}

return lp.p.Parse(name, content)
parser := lp.parsers[typ]
return parser.Parse(name, content)
}

func (lp *CTagsParser) newCTagsParser(typ CTagsParserType) (goctags.Parser, error) {
bin := lp.bins[typ]
if bin == "" {
// This happens if CTagsMustSucceed is false and we didn't find the binary
return nil, nil
}

opts := goctags.Options{Bin: bin}
if debug {
opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags)
opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags)
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved
}
return goctags.New(opts)
}

func (lp *lockedParser) Close() {
if lp.p == nil {
return
func (lp *CTagsParser) Close() {
for _, parser := range lp.parsers {
parser.Close()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the old implementation protected from calling lp.p.Close twice. Not sure if that is needed given Close likely can be called twice. Just checking :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding: because of how parser.Close() is implemented, it is okay to call twice. I just tested this by adding an extra call to parser.Close() during indexing and it was fine. Also, we do try to structure things so that parser.Close() should ever only be called once.

}
lp.p.Close()
lp.p = nil
}
9 changes: 2 additions & 7 deletions ctags/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,7 @@ func TestJSON(t *testing.T) {
t.Skip(err)
}

factory, err := NewParserFactory("universal-ctags", "", LanguageMap{}, true)
if err != nil {
t.Fatal(err)
}

p := factory.NewParser(UniversalCTags)
p := NewCTagsParser(map[CTagsParserType]string{UniversalCTags: "universal-ctags"})
defer p.Close()

java := `
Expand All @@ -50,7 +45,7 @@ class Back implements Future extends Frob {
}
`
name := "io/zoekt/Back.java"
got, err := p.Parse(name, []byte(java))
got, err := p.Parse(name, []byte(java), UniversalCTags)
if err != nil {
t.Errorf("Process: %v", err)
}
Expand Down
29 changes: 6 additions & 23 deletions ctags/parser_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@ package ctags
import (
"bytes"
"fmt"
"log"
"os"
"os/exec"
"strings"

goctags "github.com/sourcegraph/go-ctags"
)

type CTagsParserType uint8
Expand All @@ -34,6 +30,8 @@ const (
ScipCTags
)

const debug = false

type LanguageMap = map[string]CTagsParserType

func ParserToString(parser CTagsParserType) string {
Expand Down Expand Up @@ -64,14 +62,14 @@ func StringToParser(str string) CTagsParserType {
}
}

type ParserFactory map[CTagsParserType]string
type ParserBinMap map[CTagsParserType]string

func NewParserFactory(
func NewParserBinMap(
ctagsPath string,
scipCTagsPath string,
languageMap LanguageMap,
cTagsMustSucceed bool,
) (ParserFactory, error) {
) (ParserBinMap, error) {
validBins := make(map[CTagsParserType]string)
requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath}
for _, parserType := range languageMap {
Expand All @@ -86,7 +84,7 @@ func NewParserFactory(
return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType))
}
if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed {
return nil, fmt.Errorf("ctags.NewParserFactory: %v", err)
return nil, fmt.Errorf("ctags.NewParserBinMap: %v", err)
}
validBins[parserType] = bin
}
Expand Down Expand Up @@ -116,18 +114,3 @@ func checkBinary(typ CTagsParserType, bin string) error {
return nil
}

// NewParser creates a parser that is implemented by the given
// ctags binary. The parser is safe for concurrent use.
func (p ParserFactory) NewParser(typ CTagsParserType) Parser {
bin := p[typ]
if bin == "" {
return nil
}

opts := goctags.Options{Bin: bin}
if debug {
opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags)
opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags)
}
return &lockedParser{opts: opts}
}