From e12da15f7d230874ca95a257c50bca0fc39f262c Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Wed, 6 Nov 2024 13:46:49 +0100 Subject: [PATCH] cgo: support function-like macros This is needed for code like this: #define __WASI_ERRNO_INVAL (UINT16_C(28)) #define EINVAL __WASI_ERRNO_INVAL --- cgo/const.go | 128 +++++++++++++++++++++++++++++++++++-- cgo/const_test.go | 2 +- cgo/libclang.go | 98 +++++++++++++++++----------- cgo/libclang_stubs.c | 4 ++ cgo/testdata/const.go | 13 ++++ cgo/testdata/const.out.go | 3 + cgo/testdata/errors.go | 8 +++ cgo/testdata/errors.out.go | 4 ++ 8 files changed, 214 insertions(+), 46 deletions(-) diff --git a/cgo/const.go b/cgo/const.go index ab088b3c80..9e7b06b4de 100644 --- a/cgo/const.go +++ b/cgo/const.go @@ -54,8 +54,72 @@ func init() { } // parseConst parses the given string as a C constant. -func parseConst(pos token.Pos, fset *token.FileSet, value string, f *cgoFile) (ast.Expr, *scanner.Error) { +func parseConst(pos token.Pos, fset *token.FileSet, value string, params []ast.Expr, callerPos token.Pos, f *cgoFile) (ast.Expr, *scanner.Error) { t := newTokenizer(pos, fset, value, f) + + // If params is non-nil (could be a zero length slice), this const is + // actually a function-call like expression from another macro. + // This means we have to parse a string like "(a, b) (a+b)". + // We do this by parsing the parameters at the start and then treating the + // following like a normal constant expression. + if params != nil { + // Parse opening paren. + if t.curToken != token.LPAREN { + return nil, unexpectedToken(t, token.LPAREN) + } + t.Next() + + // Parse parameters (identifiers) and closing paren. + var paramIdents []string + for i := 0; ; i++ { + if i == 0 && t.curToken == token.RPAREN { + // No parameters, break early. + t.Next() + break + } + + // Read the parameter name. + if t.curToken != token.IDENT { + return nil, unexpectedToken(t, token.IDENT) + } + paramIdents = append(paramIdents, t.curValue) + t.Next() + + // Read the next token: either a continuation (comma) or end of list + // (rparen). + if t.curToken == token.RPAREN { + // End of parameter list. + t.Next() + break + } else if t.curToken == token.COMMA { + // Comma, so there will be another parameter name. + t.Next() + } else { + return nil, &scanner.Error{ + Pos: t.fset.Position(t.curPos), + Msg: "unexpected token " + t.curToken.String() + " inside macro parameters, expected ',' or ')'", + } + } + } + + // Report an error if there is a mismatch in parameter length. + // The error is reported at the location of the closing paren from the + // caller location. + if len(params) != len(paramIdents) { + return nil, &scanner.Error{ + Pos: t.fset.Position(callerPos), + Msg: fmt.Sprintf("unexpected number of parameters: expected %d, got %d", len(paramIdents), len(params)), + } + } + + // Assign values to the parameters. + // These parameter names are closer in 'scope' than other identifiers so + // will be used first when parsing an identifier. + for i, name := range paramIdents { + t.params[name] = params[i] + } + } + expr, err := parseConstExpr(t, precedenceLowest) t.Next() if t.curToken != token.EOF { @@ -96,11 +160,59 @@ func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) { } func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) { - // Normally the name is something defined in the file (like another macro) - // which we get the declaration from using getASTDeclName. - // This ensures that names that are only referenced inside a macro are still - // getting defined. + // If the identifier is one of the parameters of this function-like macro, + // use the parameter value. + if val, ok := t.params[t.curValue]; ok { + return val, nil + } + if t.f != nil { + // Check whether this identifier is actually a macro "call" with + // parameters. In that case, we should parse the parameters and pass it + // on to a new invocation of parseConst. + if t.peekToken == token.LPAREN { + if cursor, ok := t.f.names[t.curValue]; ok && t.f.isFunctionLikeMacro(cursor) { + // We know the current and peek tokens (the peek one is the '(' + // token). So skip ahead until the current token is the first + // unknown token. + t.Next() + t.Next() + + // Parse the list of parameters until ')' (rparen) is found. + params := []ast.Expr{} + for i := 0; ; i++ { + if i == 0 && t.curToken == token.RPAREN { + break + } + x, err := parseConstExpr(t, precedenceLowest) + if err != nil { + return nil, err + } + params = append(params, x) + t.Next() + if t.curToken == token.COMMA { + t.Next() + } else if t.curToken == token.RPAREN { + break + } else { + return nil, &scanner.Error{ + Pos: t.fset.Position(t.curPos), + Msg: "unexpected token " + t.curToken.String() + ", ',' or ')'", + } + } + } + + // Evaluate the macro value and use it as the identifier value. + rparen := t.curPos + pos, text := t.f.getMacro(cursor) + return parseConst(pos, t.fset, text, params, rparen, t.f) + } + } + + // Normally the name is something defined in the file (like another + // macro) which we get the declaration from using getASTDeclName. + // This ensures that names that are only referenced inside a macro are + // still getting defined. if cursor, ok := t.f.names[t.curValue]; ok { return &ast.Ident{ NamePos: t.curPos, @@ -184,6 +296,7 @@ type tokenizer struct { curToken, peekToken token.Token curValue, peekValue string buf string + params map[string]ast.Expr } // newTokenizer initializes a new tokenizer, positioned at the first token in @@ -195,6 +308,7 @@ func newTokenizer(start token.Pos, fset *token.FileSet, buf string, f *cgoFile) fset: fset, buf: buf, peekToken: token.ILLEGAL, + params: make(map[string]ast.Expr), } // Parse the first two tokens (cur and peek). t.Next() @@ -246,7 +360,7 @@ func (t *tokenizer) Next() { t.peekValue = t.buf[:2] t.buf = t.buf[2:] return - case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^': + case c == '(' || c == ')' || c == ',' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^': // Single-character tokens. // TODO: ++ (increment) and -- (decrement) operators. switch c { @@ -254,6 +368,8 @@ func (t *tokenizer) Next() { t.peekToken = token.LPAREN case ')': t.peekToken = token.RPAREN + case ',': + t.peekToken = token.COMMA case '+': t.peekToken = token.ADD case '-': diff --git a/cgo/const_test.go b/cgo/const_test.go index c2f52c53c5..b87f8063a4 100644 --- a/cgo/const_test.go +++ b/cgo/const_test.go @@ -59,7 +59,7 @@ func TestParseConst(t *testing.T) { } { fset := token.NewFileSet() startPos := fset.AddFile("", -1, 1000).Pos(0) - expr, err := parseConst(startPos, fset, tc.C, nil) + expr, err := parseConst(startPos, fset, tc.C, nil, token.NoPos, nil) s := "" if err != nil { if !strings.HasPrefix(tc.Go, "error: ") { diff --git a/cgo/libclang.go b/cgo/libclang.go index c66112d53e..794d4e81f1 100644 --- a/cgo/libclang.go +++ b/cgo/libclang.go @@ -63,6 +63,7 @@ long long tinygo_clang_getEnumConstantDeclValue(GoCXCursor c); CXType tinygo_clang_getEnumDeclIntegerType(GoCXCursor c); unsigned tinygo_clang_Cursor_isAnonymous(GoCXCursor c); unsigned tinygo_clang_Cursor_isBitField(GoCXCursor c); +unsigned tinygo_clang_Cursor_isMacroFunctionLike(GoCXCursor c); int tinygo_clang_globals_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data); int tinygo_clang_struct_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data); @@ -370,45 +371,8 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) { gen.Specs = append(gen.Specs, valueSpec) return gen, nil case C.CXCursor_MacroDefinition: - // Extract tokens from the Clang tokenizer. - // See: https://stackoverflow.com/a/19074846/559350 - sourceRange := C.tinygo_clang_getCursorExtent(c) - tu := C.tinygo_clang_Cursor_getTranslationUnit(c) - var rawTokens *C.CXToken - var numTokens C.unsigned - C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens) - tokens := unsafe.Slice(rawTokens, numTokens) - // Convert this range of tokens back to source text. - // Ugly, but it works well enough. - sourceBuf := &bytes.Buffer{} - var startOffset int - for i, token := range tokens { - spelling := getString(C.clang_getTokenSpelling(tu, token)) - location := C.clang_getTokenLocation(tu, token) - var tokenOffset C.unsigned - C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset) - if i == 0 { - // The first token is the macro name itself. - // Skip it (after using its location). - startOffset = int(tokenOffset) + len(name) - } else { - // Later tokens are the macro contents. - for int(tokenOffset) > (startOffset + sourceBuf.Len()) { - // Pad the source text with whitespace (that must have been - // present in the original source as well). - sourceBuf.WriteByte(' ') - } - sourceBuf.WriteString(spelling) - } - } - C.clang_disposeTokens(tu, rawTokens, numTokens) - value := sourceBuf.String() - // Try to convert this #define into a Go constant expression. - tokenPos := token.NoPos - if pos != token.NoPos { - tokenPos = pos + token.Pos(len(name)) - } - expr, scannerError := parseConst(tokenPos, f.fset, value, f) + tokenPos, value := f.getMacro(c) + expr, scannerError := parseConst(tokenPos, f.fset, value, nil, token.NoPos, f) if scannerError != nil { f.errors = append(f.errors, *scannerError) return nil, nil @@ -488,6 +452,62 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) { } } +// Return whether this is a macro that's also function-like, like this: +// +// #define add(a, b) (a+b) +func (f *cgoFile) isFunctionLikeMacro(c clangCursor) bool { + if C.tinygo_clang_getCursorKind(c) != C.CXCursor_MacroDefinition { + return false + } + return C.tinygo_clang_Cursor_isMacroFunctionLike(c) != 0 +} + +// Get the macro value: the position in the source file and the string value of +// the macro. +func (f *cgoFile) getMacro(c clangCursor) (pos token.Pos, value string) { + // Extract tokens from the Clang tokenizer. + // See: https://stackoverflow.com/a/19074846/559350 + sourceRange := C.tinygo_clang_getCursorExtent(c) + tu := C.tinygo_clang_Cursor_getTranslationUnit(c) + var rawTokens *C.CXToken + var numTokens C.unsigned + C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens) + tokens := unsafe.Slice(rawTokens, numTokens) + defer C.clang_disposeTokens(tu, rawTokens, numTokens) + + // Convert this range of tokens back to source text. + // Ugly, but it works well enough. + sourceBuf := &bytes.Buffer{} + var startOffset int + for i, token := range tokens { + spelling := getString(C.clang_getTokenSpelling(tu, token)) + location := C.clang_getTokenLocation(tu, token) + var tokenOffset C.unsigned + C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset) + if i == 0 { + // The first token is the macro name itself. + // Skip it (after using its location). + startOffset = int(tokenOffset) + } else { + // Later tokens are the macro contents. + for int(tokenOffset) > (startOffset + sourceBuf.Len()) { + // Pad the source text with whitespace (that must have been + // present in the original source as well). + sourceBuf.WriteByte(' ') + } + sourceBuf.WriteString(spelling) + } + } + value = sourceBuf.String() + + // Obtain the position of this token. This is the position of the first + // character in the 'value' string and is used to report errors at the + // correct location in the source file. + pos = f.getCursorPosition(c) + + return +} + func getString(clangString C.CXString) (s string) { rawString := C.clang_getCString(clangString) s = C.GoString(rawString) diff --git a/cgo/libclang_stubs.c b/cgo/libclang_stubs.c index 1b157d0aa7..e8098fac09 100644 --- a/cgo/libclang_stubs.c +++ b/cgo/libclang_stubs.c @@ -84,3 +84,7 @@ unsigned tinygo_clang_Cursor_isAnonymous(CXCursor c) { unsigned tinygo_clang_Cursor_isBitField(CXCursor c) { return clang_Cursor_isBitField(c); } + +unsigned tinygo_clang_Cursor_isMacroFunctionLike(CXCursor c) { + return clang_Cursor_isMacroFunctionLike(c); +} diff --git a/cgo/testdata/const.go b/cgo/testdata/const.go index 2589422351..d5a7dfd396 100644 --- a/cgo/testdata/const.go +++ b/cgo/testdata/const.go @@ -3,13 +3,26 @@ package main /* #define foo 3 #define bar foo + #define unreferenced 4 #define referenced unreferenced + +#define fnlike() 5 +#define fnlike_val fnlike() +#define square(n) (n*n) +#define square_val square(20) +#define add(a, b) (a + b) +#define add_val add(3, 5) */ import "C" const ( Foo = C.foo Bar = C.bar + Baz = C.referenced + + fnlike = C.fnlike_val + square = C.square_val + add = C.add_val ) diff --git a/cgo/testdata/const.out.go b/cgo/testdata/const.out.go index fb0bbeeba2..e7ee15380a 100644 --- a/cgo/testdata/const.out.go +++ b/cgo/testdata/const.out.go @@ -49,3 +49,6 @@ const C.foo = 3 const C.bar = C.foo const C.unreferenced = 4 const C.referenced = C.unreferenced +const C.fnlike_val = 5 +const C.square_val = (20 * 20) +const C.add_val = (3 + 5) diff --git a/cgo/testdata/errors.go b/cgo/testdata/errors.go index e5e809881f..75828ce0f1 100644 --- a/cgo/testdata/errors.go +++ b/cgo/testdata/errors.go @@ -26,6 +26,11 @@ import "C" // #warning another warning import "C" +// #define add(a, b) (a+b) +// #define add_toomuch add(1, 2, 3) +// #define add_toolittle add(1) +import "C" + // Make sure that errors for the following lines won't change with future // additions to the CGo preamble. // @@ -51,4 +56,7 @@ var ( // constants passed by a command line parameter _ = C.SOME_PARAM_CONST_invalid _ = C.SOME_PARAM_CONST_valid + + _ = C.add_toomuch + _ = C.add_toolittle ) diff --git a/cgo/testdata/errors.out.go b/cgo/testdata/errors.out.go index 43a6a65c97..baadba68d2 100644 --- a/cgo/testdata/errors.out.go +++ b/cgo/testdata/errors.out.go @@ -7,6 +7,8 @@ // testdata/errors.go:16:33: unexpected token ), expected end of expression // testdata/errors.go:17:34: unexpected token ), expected end of expression // -: unexpected token INT, expected end of expression +// testdata/errors.go:30:35: unexpected number of parameters: expected 2, got 3 +// testdata/errors.go:31:31: unexpected number of parameters: expected 2, got 1 // Type checking errors after CGo processing: // testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as C.char value in variable declaration (overflows) @@ -17,6 +19,8 @@ // testdata/errors.go:114: undefined: C.SOME_CONST_b // testdata/errors.go:116: undefined: C.SOME_CONST_startspace // testdata/errors.go:119: undefined: C.SOME_PARAM_CONST_invalid +// testdata/errors.go:122: undefined: C.add_toomuch +// testdata/errors.go:123: undefined: C.add_toolittle package main