Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR] Call code gen; create empty cir.func op #113483

Merged
merged 6 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/CIR/CIRGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class CIRGenerator : public clang::ASTConsumer {
~CIRGenerator() override;
void Initialize(clang::ASTContext &astCtx) override;
bool HandleTopLevelDecl(clang::DeclGroupRef group) override;
mlir::ModuleOp getModule();
erichkeane marked this conversation as resolved.
Show resolved Hide resolved
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
};

} // namespace cir
Expand Down
21 changes: 21 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIRDialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,25 @@
#ifndef LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
#define LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H

#include "mlir/IR/Builders.h"
erichkeane marked this conversation as resolved.
Show resolved Hide resolved
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Interfaces/MemorySlotInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"

#include "clang/CIR/Dialect/IR/CIROpsDialect.h.inc"

// TableGen'erated files for MLIR dialects require that a macro be defined when
// they are included. GET_OP_CLASSES tells the file to define the classes for
// the operations of that dialect.
#define GET_OP_CLASSES
erichkeane marked this conversation as resolved.
Show resolved Hide resolved
#include "clang/CIR/Dialect/IR/CIROps.h.inc"

#endif // LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
82 changes: 82 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,86 @@

include "clang/CIR/Dialect/IR/CIRDialect.td"

include "mlir/IR/BuiltinAttributeInterfaces.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/SymbolInterfaces.td"
include "mlir/IR/CommonAttrConstraints.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/MemorySlotInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"

//===----------------------------------------------------------------------===//
// CIR Ops
//===----------------------------------------------------------------------===//

// LLVMLoweringInfo is used by cir-tablegen to generate LLVM lowering logic
// automatically for CIR operations. The `llvmOp` field gives the name of the
// LLVM IR dialect operation that the CIR operation will be lowered to. The
// input arguments of the CIR operation will be passed in the same order to the
// lowered LLVM IR operation.
//
// Example:
//
// For the following CIR operation definition:
//
// def FooOp : CIR_Op<"foo"> {
// // ...
// let arguments = (ins CIR_AnyType:$arg1, CIR_AnyType:$arg2);
// let llvmOp = "BarOp";
// }
//
// cir-tablegen will generate LLVM lowering code for the FooOp similar to the
// following:
//
// class CIRFooOpLowering
// : public mlir::OpConversionPattern<mlir::cir::FooOp> {
// public:
// using OpConversionPattern<mlir::cir::FooOp>::OpConversionPattern;
//
// mlir::LogicalResult matchAndRewrite(
// mlir::cir::FooOp op,
// OpAdaptor adaptor,
// mlir::ConversionPatternRewriter &rewriter) const override {
// rewriter.replaceOpWithNewOp<mlir::LLVM::BarOp>(
// op, adaptor.getOperands()[0], adaptor.getOperands()[1]);
// return mlir::success();
// }
// }
//
// If you want fully customized LLVM IR lowering logic, simply exclude the
// `llvmOp` field from your CIR operation definition.
class LLVMLoweringInfo {
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
string llvmOp = "";
}

class CIR_Op<string mnemonic, list<Trait> traits = []> :
Op<CIR_Dialect, mnemonic, traits>, LLVMLoweringInfo;

//===----------------------------------------------------------------------===//
// FuncOp
//===----------------------------------------------------------------------===//

// TODO(CIR): For starters, cir.func has only name, nothing else. The other
// properties of a function will be added over time as more of ClangIR is
// upstreamed.

def FuncOp : CIR_Op<"func"> {
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
let summary = "Declare or define a function";
let description = [{
... lots of text to be added later ...
}];

let arguments = (ins SymbolNameAttr:$sym_name);

let skipDefaultBuilders = 1;

let builders = [OpBuilder<(ins "StringRef":$name)>];

let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
}

#endif // LLVM_CLANG_CIR_DIALECT_IR_CIROPS
137 changes: 133 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/Basic/SourceManager.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"

#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Location.h"
Expand All @@ -24,9 +27,135 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &context,
clang::ASTContext &astctx,
erichkeane marked this conversation as resolved.
Show resolved Hide resolved
const clang::CodeGenOptions &cgo,
DiagnosticsEngine &diags)
: astCtx(astctx), langOpts(astctx.getLangOpts()),
theModule{mlir::ModuleOp::create(mlir::UnknownLoc())},
target(astCtx.getTargetInfo()) {}
: builder(&context), astCtx(astctx), langOpts(astctx.getLangOpts()),
theModule{mlir::ModuleOp::create(mlir::UnknownLoc::get(&context))},
diags(diags), target(astCtx.getTargetInfo()) {}

mlir::Location CIRGenModule::getLoc(SourceLocation cLoc) {
assert(cLoc.isValid() && "expected valid source location");
const SourceManager &sm = astCtx.getSourceManager();
PresumedLoc pLoc = sm.getPresumedLoc(cLoc);
StringRef filename = pLoc.getFilename();
return mlir::FileLineColLoc::get(builder.getStringAttr(filename),
pLoc.getLine(), pLoc.getColumn());
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we care about macros/etc here? getLine and getColumn I think end up being the location itself, but there is a lot of work you can do to un-fold macros.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am guessing that the MLIR location type doesn't have any notion of multiple nested locations due to macro expansions. So we can't represent the full complexity of macros here. The outermost location seems like the right one to choose. (I am not familiar with either the Clang location type or the MLIR location type, so I welcome being corrected here.)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave this open for Aaron to comment on. IIRC, the source location stored in the AST is going to be the location of the macro, which can make diagnostics awkward/confusing if what happens is inside the expansion, but if MLIR can't comprehend "in expansion of..." then this is perhaps as good as we can do.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this may be okay -- a PresumedLoc is sensitive to #line and GNU line markers, etc, and it's always the expansion point of the wrapped source location. However, MLIR may eventually need to grow to understand the difference between spelling, expansion, and presumed locations.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you ever want to generate diagnostics from CIR, you probably want to just use the clang SourceLocation directly, not try to translate it to an MLIR "Location". It's extremely complicated to describe a location when macros are involved, and the abstraction clang uses really only makes sense for C preprocessor.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't buy the reasoning of "mlir::Location exists, therefore it's the right representation". We're losing information which is likely to be important for many users of CIR. Not saying this is something we need to solve right now, but it will eventually become problematic.

Converting a SourceLocation to line/column is also sort of slow (we compute line numbers lazily), but not sure how slow relative to other stuff CIR lowering needs to do anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't buy the reasoning of "mlir::Location exists, therefore it's the right representation".

mlir::Location is not the right representation because it exists. It's because it is used by all the other MLIR dialects that have nothing to do with C++ and cannot have a dependency on Clang. It is wanting ClangIR to play nicely with the rest of the MLIR ecosystem, when many dialects are mixed together in a single tree later in the compilation chain, that makes using SourceLocation problematic.

Not saying this is something we need to solve right now

Definitely. Using SourceLocation instead of mlir::Location is not something that can be done in this PR. We would need evidence that the lack of SourceLocation and the richness of what it can represent is a serious problem before attempting such a radical change.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is wanting ClangIR to play nicely with the rest of the MLIR ecosystem, when many dialects are mixed together in a single tree later in the compilation chain, that makes using SourceLocation problematic.

However, that's also an internal implementation detail that doesn't need to be exposed to users of the CIR library. Clang works in SourceLocation; if CIR needs to work in something else, that should be abstracted away from the user. Still not saying this needs to be solved as part of this patch, but CIRGenModule::getLoc() are public interfaces currently; can those be made private so we're not leaking an implementation detail of CIR to users?

But later passes won't have access to a SourceLocation.

Two things:

  1. Diagnostics that depend on optimization levels have massive usability issues, so diagnostics from later passes strikes me as a potential design concern to start with; hopefully we don't end up in that situation except in very rare circumstances.
  2. The backend does occasionally need to issue user-facing diagnostics (things like __attribute__((error)) come to mind) and this has been a huge source of pain because LLVM does a pretty bad job of tracking source location with sufficient fidelity for Clang to have good QoI. If MLIR has the same issues, that should be something the MLIR folks work to correct before we transition to CIR as a default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, that's also an internal implementation detail that doesn't need to be exposed to users of the CIR library.

The primary use of ClangIR is intended to be of the MLIR data structures after CIR code gen has happened. The public interface for ClangIR is entirely based on MLIR, including mlir::Location. Some users will want to use the CIR library to convert from the Clang AST to the ClangIR MLIR dialect, but for many users of ClangIR the fact that the MLIR data structures were generated from the Clang front end and the Clang AST is an implementation detail that they don't need to know about. SourceLocation is more of an implementation detail than mlir::Location is.

Changing CIRGenModule::getLoc() to be private functions would be problematic because those functions will be used by many different classes within the ClangIR code gen code. Those functions really should have module accessibility (similar to package or assembly accessibility in Java or C#), but that's not practical until Clang/LLVM fully moves to C++20 modules.

I am aware and agree that (1) back end diagnostics should be avoided where possible and should be rare, but that (2) when they do happen they can be hard to get right. We struggle with that same problem with NVC++. We should solve the problem of getting good diagnostics during the MLIR passes when we actually run into the problem. It is only then that we will know how serious the problem is, how best to solve it, and how much effort to put into it. Solving that problem now is premature and will miss the mark in some way.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SourceLocation is more of an implementation detail than mlir::Location is.

That depends on perspective, IMO. From our perspective, CIR is the way in which we lower Clang's AST to an IR that eventually ends up in LLVM. The public entrypoints into CIR from Clang's perspective should be using Clang's data structures. If those need to be converted internally into MLIR-specific data structures, that's fine, but that should not leak out into the public interfaces that Clang can interact with.

However, I see now that I missed something important here -- this code is under clang/lib/CIR and not clang/include/clang/CIR, so it isn't in the public interfaces that Clang interacts with, it is (sufficiently) hidden as an implementation detail. Sorry for missing that!

We should solve the problem of getting good diagnostics during the MLIR passes when we actually run into the problem. It is only then that we will know how serious the problem is, how best to solve it, and how much effort to put into it. Solving that problem now is premature and will miss the mark in some way.

The point to these initial PRs is to ensure the community is comfortable with the design and doesn't see any glaring design concerns, so saying "we'll solve it when we get to it" on a situation we know we will hit sort of defeats the purpose. We already know the problem exists because it's one we fight with today and we already know a better way to solve it is to use source location information with at least as much fidelity as Clang is able to handle. Given that one of the benefits to CIR is for better codegen analysis that can lead to improved diagnostics, having this as an early design concern is pretty reasonable IMO. That said, we're not insisting on a change as part of this PR -- just that the CIR folks understand that this is a design concern with the facilities and at some point it may go from "concern" to "required to improve before we can proceed."


mlir::Location CIRGenModule::getLoc(SourceRange cRange) {
assert(cRange.isValid() && "expected a valid source range");
mlir::Location begin = getLoc(cRange.getBegin());
mlir::Location end = getLoc(cRange.getEnd());
mlir::Attribute metadata;
return mlir::FusedLoc::get({begin, end}, metadata, builder.getContext());
}

void CIRGenModule::buildGlobal(clang::GlobalDecl gd) {
const auto *global = cast<ValueDecl>(gd.getDecl());
erichkeane marked this conversation as resolved.
Show resolved Hide resolved

if (const auto *fd = dyn_cast<FunctionDecl>(global)) {
// Update deferred annotations with the latest declaration if the function
// was already used or defined.
if (fd->hasAttr<AnnotateAttr>()) {
errorNYI(fd->getSourceRange(), "defferedAnnotations");
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
}
if (!fd->doesThisDeclarationHaveABody()) {
erichkeane marked this conversation as resolved.
Show resolved Hide resolved
if (!fd->doesDeclarationForceExternallyVisibleDefinition())
return;

errorNYI(fd->getSourceRange(),
"function declaration that forces code gen");
return;
}
} else {
errorNYI(global->getSourceRange(), "global variable declaration");
}

// TODO(CIR): Defer emitting some global definitions until later
buildGlobalDefinition(gd);
}

void CIRGenModule::buildGlobalFunctionDefinition(clang::GlobalDecl gd,
mlir::Operation *op) {
auto const *funcDecl = cast<FunctionDecl>(gd.getDecl());
auto funcOp = builder.create<mlir::cir::FuncOp>(
erichkeane marked this conversation as resolved.
Show resolved Hide resolved
getLoc(funcDecl->getSourceRange()), funcDecl->getIdentifier()->getName());
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
theModule.push_back(funcOp);
}

void CIRGenModule::buildGlobalDefinition(clang::GlobalDecl gd,
mlir::Operation *op) {
const auto *decl = cast<ValueDecl>(gd.getDecl());
if (const auto *fd = dyn_cast<FunctionDecl>(decl)) {
// TODO(CIR): Skip generation of CIR for functions with available_externally
// linkage at -O0.

if (const auto *method = dyn_cast<CXXMethodDecl>(decl)) {
// Make sure to emit the definition(s) before we emit the thunks. This is
// necessary for the generation of certain thunks.
(void)method;
errorNYI(method->getSourceRange(), "member function");
return;
}

if (fd->isMultiVersion())
errorNYI(fd->getSourceRange(), "multiversion functions");
buildGlobalFunctionDefinition(gd, op);
return;
}

llvm_unreachable("Invalid argument to CIRGenModule::buildGlobalDefinition");
}

// Emit code for a single top level declaration.
void CIRGenModule::buildTopLevelDecl(Decl *decl) {}
void CIRGenModule::buildTopLevelDecl(Decl *decl) {

// Ignore dependent declarations.
if (decl->isTemplated())
AaronBallman marked this conversation as resolved.
Show resolved Hide resolved
return;

switch (decl->getKind()) {
default:
errorNYI(decl->getBeginLoc(), "declaration of kind",
decl->getDeclKindName());
break;

case Decl::Function: {
auto *fd = cast<FunctionDecl>(decl);
// Consteval functions shouldn't be emitted.
if (!fd->isConsteval())
buildGlobal(fd);
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
break;
}
}
}

DiagnosticBuilder CIRGenModule::errorNYI(llvm::StringRef feature) {
unsigned diagID = diags.getCustomDiagID(
DiagnosticsEngine::Error, "ClangIR code gen Not Yet Implemented: %0");
return diags.Report(diagID) << feature;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceLocation loc,
llvm::StringRef feature) {
unsigned diagID = diags.getCustomDiagID(
DiagnosticsEngine::Error, "ClangIR code gen Not Yet Implemented: %0");
return diags.Report(loc, diagID) << feature;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceLocation loc,
llvm::StringRef feature,
llvm::StringRef name) {
unsigned diagID = diags.getCustomDiagID(
DiagnosticsEngine::Error, "ClangIR code gen Not Yet Implemented: %0: %1");
return diags.Report(loc, diagID) << feature << name;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceRange loc,
llvm::StringRef feature) {
return errorNYI(loc.getBegin(), feature) << loc;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceRange loc,
llvm::StringRef feature,
llvm::StringRef name) {
return errorNYI(loc.getBegin(), feature, name) << loc;
}
33 changes: 33 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@

#include "CIRGenTypeCache.h"

#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/MLIRContext.h"
#include "llvm/ADT/StringRef.h"

namespace clang {
class ASTContext;
class CodeGenOptions;
class Decl;
class DiagnosticBuilder;
class DiagnosticsEngine;
class GlobalDecl;
class LangOptions;
class SourceLocation;
class SourceRange;
class TargetInfo;
} // namespace clang

Expand All @@ -44,6 +50,10 @@ class CIRGenModule : public CIRGenTypeCache {
~CIRGenModule() = default;

private:
// TODO(CIR) 'builder' will change to CIRGenBuilderTy once that type is
// defined
mlir::OpBuilder builder;

/// Hold Clang AST information.
clang::ASTContext &astCtx;

Expand All @@ -52,10 +62,33 @@ class CIRGenModule : public CIRGenTypeCache {
/// A "module" matches a c/cpp source file: containing a list of functions.
mlir::ModuleOp theModule;

clang::DiagnosticsEngine &diags;

const clang::TargetInfo &target;

public:
mlir::ModuleOp getModule() const { return theModule; }

/// Helpers to convert Clang's SourceLocation to an MLIR Location.
dkolsen-pgi marked this conversation as resolved.
Show resolved Hide resolved
mlir::Location getLoc(clang::SourceLocation cLoc);
mlir::Location getLoc(clang::SourceRange cRange);

void buildTopLevelDecl(clang::Decl *decl);

/// Emit code for a single global function or variable declaration. Forward
/// declarations are emitted lazily.
void buildGlobal(clang::GlobalDecl gd);

void buildGlobalDefinition(clang::GlobalDecl gd,
mlir::Operation *op = nullptr);
void buildGlobalFunctionDefinition(clang::GlobalDecl gd, mlir::Operation *op);

/// Helpers to emit "not yet implemented" error diagnostics
DiagnosticBuilder errorNYI(llvm::StringRef);
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceRange, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceRange, llvm::StringRef, llvm::StringRef);
};
} // namespace cir

Expand Down
10 changes: 9 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@

#include "CIRGenModule.h"

#include "mlir/IR/MLIRContext.h"

#include "clang/AST/DeclGroup.h"
#include "clang/CIR/CIRGenerator.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"

using namespace cir;
using namespace clang;
Expand All @@ -31,9 +34,14 @@ void CIRGenerator::Initialize(ASTContext &astCtx) {

this->astCtx = &astCtx;

cgm = std::make_unique<CIRGenModule>(*mlirCtx, astCtx, codeGenOpts, diags);
mlirCtx = std::make_unique<mlir::MLIRContext>();
mlirCtx->getOrLoadDialect<mlir::cir::CIRDialect>();
lanza marked this conversation as resolved.
Show resolved Hide resolved
cgm = std::make_unique<CIRGenModule>(*mlirCtx.get(), astCtx, codeGenOpts,
diags);
}

mlir::ModuleOp CIRGenerator::getModule() { return cgm->getModule(); }

bool CIRGenerator::HandleTopLevelDecl(DeclGroupRef group) {

for (Decl *decl : group)
Expand Down
Loading
Loading