Skip to content

Commit

Permalink
Symbol grammar clarifications (#255)
Browse files Browse the repository at this point in the history
- Forbids scheme from being empty or starting with 'local'
- Unifies "any UTF-8" wording
- Specifies that identifiers that can be encoded as simple identifiers have to be encoded as simple identifiers
- Makes method_disambiguator optional

Co-authored-by: Varun Gandhi <[email protected]>
  • Loading branch information
kritzcreek and varungandhi-src authored Jun 27, 2024
1 parent 76a272e commit e7242d5
Show file tree
Hide file tree
Showing 10 changed files with 1,862 additions and 1,812 deletions.
11 changes: 6 additions & 5 deletions bindings/go/scip/scip.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 6 additions & 8 deletions bindings/go/scip/symbol.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package scip
import (
"fmt"
"strings"
"unicode"

"github.com/sourcegraph/sourcegraph/lib/errors"
)
Expand All @@ -22,10 +21,13 @@ func IsLocalSymbol(symbol string) bool {
return strings.HasPrefix(symbol, "local ")
}

func isSimpleIdentifierCharacter(c rune) bool {
return c == '_' || c == '+' || c == '-' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
}

func isSimpleIdentifier(s string) bool {
for _, c := range s {
if ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
c == '$' || c == '+' || c == '-' || c == '_' {
if isSimpleIdentifierCharacter(c) {
continue
}
return false
Expand Down Expand Up @@ -218,7 +220,7 @@ func (s *symbolParser) acceptIdentifier(what string) (string, error) {
return s.acceptBacktickEscapedIdentifier(what)
}
start := s.index
for s.index < len(s.Symbol) && isIdentifierCharacter(s.current()) {
for s.index < len(s.Symbol) && isSimpleIdentifierCharacter(s.current()) {
s.index++
}
if start == s.index {
Expand All @@ -227,10 +229,6 @@ func (s *symbolParser) acceptIdentifier(what string) (string, error) {
return string(s.Symbol[start:s.index]), nil
}

func isIdentifierCharacter(r rune) bool {
return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '+' || r == '$' || r == '_'
}

func (s *symbolParser) acceptSpaceEscapedIdentifier(what string) (string, error) {
return s.acceptEscapedIdentifier(what, ' ')
}
Expand Down
2 changes: 1 addition & 1 deletion bindings/go/scip/symbol_formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func writeEscapedPackage(b *strings.Builder, name string) {
func writeSuffixedDescriptor(b *strings.Builder, identifier string, suffixes ...rune) {
escape := false
for _, ch := range identifier {
if !isIdentifierCharacter(ch) {
if !isSimpleIdentifierCharacter(ch) {
escape = true
break
}
Expand Down
1 change: 1 addition & 0 deletions bindings/go/scip/symbol_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ func TestParseSymbolError(t *testing.T) {
"lsif-java maven package 1.0.0",
"lsif-java maven package 1.0.0 java/io/File#Entry.trailingstring",
"lsif-java maven package 1.0.0 java/io/File#Entry.unrecognizedSuffix@",
"lsif-java maven package 1.0.0 java/io/File#Entry.nonSimpλeIdentifier.",
"local 🧠",
"local ",
"local &&&",
Expand Down
Loading

0 comments on commit e7242d5

Please sign in to comment.