Skip to content

Commit

Permalink
cgo: support function-like macros
Browse files Browse the repository at this point in the history
This is needed for code like this:

    #define __WASI_ERRNO_INVAL (UINT16_C(28))
    #define EINVAL __WASI_ERRNO_INVAL
  • Loading branch information
aykevl authored and deadprogram committed Nov 18, 2024
1 parent c4867c8 commit e12da15
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 46 deletions.
128 changes: 122 additions & 6 deletions cgo/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,72 @@ func init() {
}

// parseConst parses the given string as a C constant.
func parseConst(pos token.Pos, fset *token.FileSet, value string, f *cgoFile) (ast.Expr, *scanner.Error) {
func parseConst(pos token.Pos, fset *token.FileSet, value string, params []ast.Expr, callerPos token.Pos, f *cgoFile) (ast.Expr, *scanner.Error) {
t := newTokenizer(pos, fset, value, f)

// If params is non-nil (could be a zero length slice), this const is
// actually a function-call like expression from another macro.
// This means we have to parse a string like "(a, b) (a+b)".
// We do this by parsing the parameters at the start and then treating the
// following like a normal constant expression.
if params != nil {
// Parse opening paren.
if t.curToken != token.LPAREN {
return nil, unexpectedToken(t, token.LPAREN)
}
t.Next()

// Parse parameters (identifiers) and closing paren.
var paramIdents []string
for i := 0; ; i++ {
if i == 0 && t.curToken == token.RPAREN {
// No parameters, break early.
t.Next()
break
}

// Read the parameter name.
if t.curToken != token.IDENT {
return nil, unexpectedToken(t, token.IDENT)
}
paramIdents = append(paramIdents, t.curValue)
t.Next()

// Read the next token: either a continuation (comma) or end of list
// (rparen).
if t.curToken == token.RPAREN {
// End of parameter list.
t.Next()
break
} else if t.curToken == token.COMMA {
// Comma, so there will be another parameter name.
t.Next()
} else {
return nil, &scanner.Error{
Pos: t.fset.Position(t.curPos),
Msg: "unexpected token " + t.curToken.String() + " inside macro parameters, expected ',' or ')'",
}
}
}

// Report an error if there is a mismatch in parameter length.
// The error is reported at the location of the closing paren from the
// caller location.
if len(params) != len(paramIdents) {
return nil, &scanner.Error{
Pos: t.fset.Position(callerPos),
Msg: fmt.Sprintf("unexpected number of parameters: expected %d, got %d", len(paramIdents), len(params)),
}
}

// Assign values to the parameters.
// These parameter names are closer in 'scope' than other identifiers so
// will be used first when parsing an identifier.
for i, name := range paramIdents {
t.params[name] = params[i]
}
}

expr, err := parseConstExpr(t, precedenceLowest)
t.Next()
if t.curToken != token.EOF {
Expand Down Expand Up @@ -96,11 +160,59 @@ func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
}

func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
// Normally the name is something defined in the file (like another macro)
// which we get the declaration from using getASTDeclName.
// This ensures that names that are only referenced inside a macro are still
// getting defined.
// If the identifier is one of the parameters of this function-like macro,
// use the parameter value.
if val, ok := t.params[t.curValue]; ok {
return val, nil
}

if t.f != nil {
// Check whether this identifier is actually a macro "call" with
// parameters. In that case, we should parse the parameters and pass it
// on to a new invocation of parseConst.
if t.peekToken == token.LPAREN {
if cursor, ok := t.f.names[t.curValue]; ok && t.f.isFunctionLikeMacro(cursor) {
// We know the current and peek tokens (the peek one is the '('
// token). So skip ahead until the current token is the first
// unknown token.
t.Next()
t.Next()

// Parse the list of parameters until ')' (rparen) is found.
params := []ast.Expr{}
for i := 0; ; i++ {
if i == 0 && t.curToken == token.RPAREN {
break
}
x, err := parseConstExpr(t, precedenceLowest)
if err != nil {
return nil, err
}
params = append(params, x)
t.Next()
if t.curToken == token.COMMA {
t.Next()
} else if t.curToken == token.RPAREN {
break
} else {
return nil, &scanner.Error{
Pos: t.fset.Position(t.curPos),
Msg: "unexpected token " + t.curToken.String() + ", ',' or ')'",
}
}
}

// Evaluate the macro value and use it as the identifier value.
rparen := t.curPos
pos, text := t.f.getMacro(cursor)
return parseConst(pos, t.fset, text, params, rparen, t.f)
}
}

// Normally the name is something defined in the file (like another
// macro) which we get the declaration from using getASTDeclName.
// This ensures that names that are only referenced inside a macro are
// still getting defined.
if cursor, ok := t.f.names[t.curValue]; ok {
return &ast.Ident{
NamePos: t.curPos,
Expand Down Expand Up @@ -184,6 +296,7 @@ type tokenizer struct {
curToken, peekToken token.Token
curValue, peekValue string
buf string
params map[string]ast.Expr
}

// newTokenizer initializes a new tokenizer, positioned at the first token in
Expand All @@ -195,6 +308,7 @@ func newTokenizer(start token.Pos, fset *token.FileSet, buf string, f *cgoFile)
fset: fset,
buf: buf,
peekToken: token.ILLEGAL,
params: make(map[string]ast.Expr),
}
// Parse the first two tokens (cur and peek).
t.Next()
Expand Down Expand Up @@ -246,14 +360,16 @@ func (t *tokenizer) Next() {
t.peekValue = t.buf[:2]
t.buf = t.buf[2:]
return
case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
case c == '(' || c == ')' || c == ',' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
// Single-character tokens.
// TODO: ++ (increment) and -- (decrement) operators.
switch c {
case '(':
t.peekToken = token.LPAREN
case ')':
t.peekToken = token.RPAREN
case ',':
t.peekToken = token.COMMA
case '+':
t.peekToken = token.ADD
case '-':
Expand Down
2 changes: 1 addition & 1 deletion cgo/const_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestParseConst(t *testing.T) {
} {
fset := token.NewFileSet()
startPos := fset.AddFile("", -1, 1000).Pos(0)
expr, err := parseConst(startPos, fset, tc.C, nil)
expr, err := parseConst(startPos, fset, tc.C, nil, token.NoPos, nil)
s := "<invalid>"
if err != nil {
if !strings.HasPrefix(tc.Go, "error: ") {
Expand Down
98 changes: 59 additions & 39 deletions cgo/libclang.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ long long tinygo_clang_getEnumConstantDeclValue(GoCXCursor c);
CXType tinygo_clang_getEnumDeclIntegerType(GoCXCursor c);
unsigned tinygo_clang_Cursor_isAnonymous(GoCXCursor c);
unsigned tinygo_clang_Cursor_isBitField(GoCXCursor c);
unsigned tinygo_clang_Cursor_isMacroFunctionLike(GoCXCursor c);
int tinygo_clang_globals_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data);
int tinygo_clang_struct_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data);
Expand Down Expand Up @@ -370,45 +371,8 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) {
gen.Specs = append(gen.Specs, valueSpec)
return gen, nil
case C.CXCursor_MacroDefinition:
// Extract tokens from the Clang tokenizer.
// See: https://stackoverflow.com/a/19074846/559350
sourceRange := C.tinygo_clang_getCursorExtent(c)
tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
var rawTokens *C.CXToken
var numTokens C.unsigned
C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens)
tokens := unsafe.Slice(rawTokens, numTokens)
// Convert this range of tokens back to source text.
// Ugly, but it works well enough.
sourceBuf := &bytes.Buffer{}
var startOffset int
for i, token := range tokens {
spelling := getString(C.clang_getTokenSpelling(tu, token))
location := C.clang_getTokenLocation(tu, token)
var tokenOffset C.unsigned
C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset)
if i == 0 {
// The first token is the macro name itself.
// Skip it (after using its location).
startOffset = int(tokenOffset) + len(name)
} else {
// Later tokens are the macro contents.
for int(tokenOffset) > (startOffset + sourceBuf.Len()) {
// Pad the source text with whitespace (that must have been
// present in the original source as well).
sourceBuf.WriteByte(' ')
}
sourceBuf.WriteString(spelling)
}
}
C.clang_disposeTokens(tu, rawTokens, numTokens)
value := sourceBuf.String()
// Try to convert this #define into a Go constant expression.
tokenPos := token.NoPos
if pos != token.NoPos {
tokenPos = pos + token.Pos(len(name))
}
expr, scannerError := parseConst(tokenPos, f.fset, value, f)
tokenPos, value := f.getMacro(c)
expr, scannerError := parseConst(tokenPos, f.fset, value, nil, token.NoPos, f)
if scannerError != nil {
f.errors = append(f.errors, *scannerError)
return nil, nil
Expand Down Expand Up @@ -488,6 +452,62 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) {
}
}

// Return whether this is a macro that's also function-like, like this:
//
// #define add(a, b) (a+b)
func (f *cgoFile) isFunctionLikeMacro(c clangCursor) bool {
if C.tinygo_clang_getCursorKind(c) != C.CXCursor_MacroDefinition {
return false
}
return C.tinygo_clang_Cursor_isMacroFunctionLike(c) != 0
}

// Get the macro value: the position in the source file and the string value of
// the macro.
func (f *cgoFile) getMacro(c clangCursor) (pos token.Pos, value string) {
// Extract tokens from the Clang tokenizer.
// See: https://stackoverflow.com/a/19074846/559350
sourceRange := C.tinygo_clang_getCursorExtent(c)
tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
var rawTokens *C.CXToken
var numTokens C.unsigned
C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens)
tokens := unsafe.Slice(rawTokens, numTokens)
defer C.clang_disposeTokens(tu, rawTokens, numTokens)

// Convert this range of tokens back to source text.
// Ugly, but it works well enough.
sourceBuf := &bytes.Buffer{}
var startOffset int
for i, token := range tokens {
spelling := getString(C.clang_getTokenSpelling(tu, token))
location := C.clang_getTokenLocation(tu, token)
var tokenOffset C.unsigned
C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset)
if i == 0 {
// The first token is the macro name itself.
// Skip it (after using its location).
startOffset = int(tokenOffset)
} else {
// Later tokens are the macro contents.
for int(tokenOffset) > (startOffset + sourceBuf.Len()) {
// Pad the source text with whitespace (that must have been
// present in the original source as well).
sourceBuf.WriteByte(' ')
}
sourceBuf.WriteString(spelling)
}
}
value = sourceBuf.String()

// Obtain the position of this token. This is the position of the first
// character in the 'value' string and is used to report errors at the
// correct location in the source file.
pos = f.getCursorPosition(c)

return
}

func getString(clangString C.CXString) (s string) {
rawString := C.clang_getCString(clangString)
s = C.GoString(rawString)
Expand Down
4 changes: 4 additions & 0 deletions cgo/libclang_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,7 @@ unsigned tinygo_clang_Cursor_isAnonymous(CXCursor c) {
unsigned tinygo_clang_Cursor_isBitField(CXCursor c) {
return clang_Cursor_isBitField(c);
}

unsigned tinygo_clang_Cursor_isMacroFunctionLike(CXCursor c) {
return clang_Cursor_isMacroFunctionLike(c);
}
13 changes: 13 additions & 0 deletions cgo/testdata/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,26 @@ package main
/*
#define foo 3
#define bar foo
#define unreferenced 4
#define referenced unreferenced
#define fnlike() 5
#define fnlike_val fnlike()
#define square(n) (n*n)
#define square_val square(20)
#define add(a, b) (a + b)
#define add_val add(3, 5)
*/
import "C"

const (
Foo = C.foo
Bar = C.bar

Baz = C.referenced

fnlike = C.fnlike_val
square = C.square_val
add = C.add_val
)
3 changes: 3 additions & 0 deletions cgo/testdata/const.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ const C.foo = 3
const C.bar = C.foo
const C.unreferenced = 4
const C.referenced = C.unreferenced
const C.fnlike_val = 5
const C.square_val = (20 * 20)
const C.add_val = (3 + 5)
8 changes: 8 additions & 0 deletions cgo/testdata/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ import "C"
// #warning another warning
import "C"

// #define add(a, b) (a+b)
// #define add_toomuch add(1, 2, 3)
// #define add_toolittle add(1)
import "C"

// Make sure that errors for the following lines won't change with future
// additions to the CGo preamble.
//
Expand All @@ -51,4 +56,7 @@ var (
// constants passed by a command line parameter
_ = C.SOME_PARAM_CONST_invalid
_ = C.SOME_PARAM_CONST_valid

_ = C.add_toomuch
_ = C.add_toolittle
)
4 changes: 4 additions & 0 deletions cgo/testdata/errors.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
// testdata/errors.go:16:33: unexpected token ), expected end of expression
// testdata/errors.go:17:34: unexpected token ), expected end of expression
// -: unexpected token INT, expected end of expression
// testdata/errors.go:30:35: unexpected number of parameters: expected 2, got 3
// testdata/errors.go:31:31: unexpected number of parameters: expected 2, got 1

// Type checking errors after CGo processing:
// testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as C.char value in variable declaration (overflows)
Expand All @@ -17,6 +19,8 @@
// testdata/errors.go:114: undefined: C.SOME_CONST_b
// testdata/errors.go:116: undefined: C.SOME_CONST_startspace
// testdata/errors.go:119: undefined: C.SOME_PARAM_CONST_invalid
// testdata/errors.go:122: undefined: C.add_toomuch
// testdata/errors.go:123: undefined: C.add_toolittle

package main

Expand Down

0 comments on commit e12da15

Please sign in to comment.