Skip to content

Commit

Permalink
Pix mesh shader output instrumentation (microsoft#2709)
Browse files Browse the repository at this point in the history
This is a pass for PIX that adds instructions to write mesh shader output (vertices and indices) to a UAV for later ingestion by PIX in order to present a view of that output.
  • Loading branch information
jeffnn authored Feb 21, 2020
1 parent 6eb0e07 commit eb33030
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 0 deletions.
2 changes: 2 additions & 0 deletions include/dxc/DxilPIXPasses/DxilPIXPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ ModulePass *createDxilAddPixelHitInstrumentationPass();
ModulePass *createDxilDbgValueToDbgDeclarePass();
ModulePass *createDxilAnnotateWithVirtualRegisterPass();
ModulePass *createDxilOutputColorBecomesConstantPass();
ModulePass *createDxilDxilPIXMeshShaderOutputInstrumentation();
ModulePass *createDxilRemoveDiscardsPass();
ModulePass *createDxilReduceMSAAToSingleSamplePass();
ModulePass *createDxilForceEarlyZPass();
Expand All @@ -29,6 +30,7 @@ void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry&);
void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry&);
void initializeDxilAnnotateWithVirtualRegisterPass(llvm::PassRegistry&);
void initializeDxilOutputColorBecomesConstantPass(llvm::PassRegistry&);
void initializeDxilPIXMeshShaderOutputInstrumentationPass(llvm::PassRegistry &);
void initializeDxilRemoveDiscardsPass(llvm::PassRegistry&);
void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&);
void initializeDxilForceEarlyZPass(llvm::PassRegistry&);
Expand Down
1 change: 1 addition & 0 deletions lib/DxilPIXPasses/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ add_llvm_library(LLVMDxilPIXPasses
DxilDebugInstrumentation.cpp
DxilForceEarlyZ.cpp
DxilOutputColorBecomesConstant.cpp
DxilPIXMeshShaderOutputInstrumentation.cpp
DxilRemoveDiscards.cpp
DxilReduceMSAAToSingleSample.cpp
DxilShaderAccessTracking.cpp
Expand Down
336 changes: 336 additions & 0 deletions lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
///////////////////////////////////////////////////////////////////////////////
// //
// DxilAddPixelHitInstrumentation.cpp //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// Provides a pass to add instrumentation to retrieve mesh shader output. //
// Used by PIX. //
// //
///////////////////////////////////////////////////////////////////////////////

#include "dxc/DXIL/DxilOperations.h"
#include "dxc/DXIL/DxilUtil.h"

#include "dxc/DXIL/DxilInstructions.h"
#include "dxc/DXIL/DxilModule.h"
#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
#include "dxc/HLSL/DxilGenerationPass.h"
#include "dxc/HLSL/DxilSpanAllocator.h"

#include "llvm/IR/PassManager.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <deque>

#ifdef _WIN32
#include <winerror.h>
#endif

// Keep this in sync with the same-named value in the debugger application's
// WinPixShaderUtils.h
constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;

// Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
constexpr uint32_t triangleIndexIndicator = 1;
constexpr uint32_t int32ValueIndicator = 2;
constexpr uint32_t floatValueIndicator = 3;

using namespace llvm;
using namespace hlsl;

class DxilPIXMeshShaderOutputInstrumentation : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
const char *getPassName() const override {
return "DXIL mesh shader output instrumentation";
}
void applyOptions(PassOptions O) override;
bool runOnModule(Module &M) override;

private:
CallInst *m_OutputUAV = nullptr;
int m_RemainingReservedSpaceInBytes = 0;
Constant *m_OffsetMask = nullptr;

uint64_t m_UAVSize = 1024 * 1024;

struct BuilderContext {
Module &M;
DxilModule &DM;
LLVMContext &Ctx;
OP *HlslOP;
IRBuilder<> &Builder;
};

CallInst *addUAV(BuilderContext &BC);
Value *insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC);
Value *insertInstructionsToCalculateGroupIdZ(BuilderContext &BC);
Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes);
uint32_t UAVDumpingGroundOffset();
Value *writeDwordAndReturnNewOffset(BuilderContext &BC, Value *TheOffset,
Value *TheValue);
template <typename... T> void Instrument(BuilderContext &BC, T... values);
};

void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) {
GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
}

uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() {
return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
}

CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
// Set up a UAV with structure of a single int
unsigned int UAVResourceHandle =
static_cast<unsigned int>(BC.DM.GetUAVs().size());
SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
llvm::StructType *UAVStructTy =
llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
pUAV->SetGlobalName("PIX_DebugUAVName");
pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
pUAV->SetID(UAVResourceHandle);
pUAV->SetSpaceID(
(unsigned int)-2); // This is the reserved-for-tools register space
pUAV->SetSampleCount(1);
pUAV->SetGloballyCoherent(false);
pUAV->SetHasCounter(false);
pUAV->SetCompType(CompType::getI32());
pUAV->SetLowerBound(0);
pUAV->SetRangeSize(1);
pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
pUAV->SetRW(true);

auto ID = BC.DM.AddUAV(std::move(pUAV));
assert(ID == UAVResourceHandle);

BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);

// Create handle for the newly-added UAV
Function *CreateHandleOpFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
Constant *CreateHandleOpcodeArg =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
Constant *UAVVArg = BC.HlslOP->GetI8Const(
static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
DXIL::ResourceClass::UAV));
Constant *MetaDataArg = BC.HlslOP->GetU32Const(
ID); // position of the metadata record in the corresponding metadata list
Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
Constant *FalseArg =
BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
return BC.Builder.CreateCall(
CreateHandleOpFunc,
{CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
"PIX_DebugUAV_Handle");
}

Value *DxilPIXMeshShaderOutputInstrumentation::
insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC) {
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
Constant *One32Arg = BC.HlslOP->GetU32Const(1);

auto GroupIdFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
auto GroupIdX =
BC.Builder.CreateCall(GroupIdFunc, {Opcode, Zero32Arg}, "GroupIdX");
auto GroupIdY =
BC.Builder.CreateCall(GroupIdFunc, {Opcode, One32Arg}, "GroupIdY");

// Spec requires that no group id index is greater than 64k, so we can
// combine two into one 32-bit value:
auto YShifted =
BC.Builder.CreateShl(GroupIdY, 16);
return BC.Builder.CreateAdd(YShifted, GroupIdX);
}

Value *DxilPIXMeshShaderOutputInstrumentation::
insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) {
Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
auto GroupIdFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);

return BC.Builder.CreateCall(GroupIdFunc, {Opcode, Two32Arg}, "GroupIdZ");
}

Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
BuilderContext &BC, uint32_t SpaceInBytes) {
assert(m_RemainingReservedSpaceInBytes ==
0); // or else the previous caller reserved too much space

m_RemainingReservedSpaceInBytes = SpaceInBytes;

// Insert the UAV increment instruction:
Function *AtomicOpFunc =
BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
Constant *AtomicBinOpcode =
BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
Constant *AtomicAdd =
BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset());
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));

Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);

auto *PreviousValue = BC.Builder.CreateCall(
AtomicOpFunc,
{
AtomicBinOpcode, // i32, ; opcode
m_OutputUAV, // %dx.types.Handle, ; resource handle
AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
// XOR, IMIN, IMAX, UMIN, UMAX
OffsetArg, // i32, ; coordinate c0: index in bytes
UndefArg, // i32, ; coordinate c1 (unused)
UndefArg, // i32, ; coordinate c2 (unused)
Increment, // i32); increment value
},
"UAVIncResult");

return BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
}

Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
BuilderContext &BC, Value *TheOffset, Value *TheValue) {

Function *StoreValue =
BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
Constant *StoreValueOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);

(void)BC.Builder.CreateCall(
StoreValue,
{StoreValueOpcode, // i32 opcode
m_OutputUAV, // %dx.types.Handle, ; resource handle
TheOffset, // i32 c0: index in bytes into UAV
Undef32Arg, // i32 c1: unused
TheValue,
Undef32Arg, // unused values
Undef32Arg, // unused values
Undef32Arg, // unused values
WriteMask_X});

m_RemainingReservedSpaceInBytes -= sizeof(uint32_t);
assert(m_RemainingReservedSpaceInBytes >=
0); // or else the caller didn't reserve enough space

return BC.Builder.CreateAdd(
TheOffset,
BC.HlslOP->GetU32Const(static_cast<unsigned int>(sizeof(uint32_t))));
}

template <typename... T>
void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
T... values) {
llvm::SmallVector<llvm::Value *, 10> Values(
{static_cast<llvm::Value *>(values)...});
const uint32_t DwordCount = Values.size();
llvm::Value *byteOffset =
reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
for (llvm::Value *V : Values) {
byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
}
}

bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
DxilModule &DM = M.GetOrCreateDxilModule();
LLVMContext &Ctx = M.getContext();
OP *HlslOP = DM.GetOP();

Instruction *firstInsertionPt =
dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
IRBuilder<> Builder(firstInsertionPt);

BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);

m_OutputUAV = addUAV(BC);

auto GroupIdXandY = insertInstructionsToCalculateFlattenedGroupIdXandY(BC);
auto GroupIdZ = insertInstructionsToCalculateGroupIdZ(BC);

auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
auto FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();

auto Call = cast<CallInst>(FunctionUser);

IRBuilder<> Builder2(Call);
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};

Instrument(BC2, BC2.HlslOP->GetI32Const(triangleIndexIndicator),
GroupIdXandY, GroupIdZ, Call->getOperand(1),
Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
}

F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getInt32Ty(Ctx));
FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();

auto Call = cast<CallInst>(FunctionUser);

IRBuilder<> Builder2(Call);
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};

{
auto expandBits = BC2.Builder.CreateCast(
Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));

Instrument(BC2, BC2.HlslOP->GetI32Const(int32ValueIndicator),
GroupIdXandY, GroupIdZ, Call->getOperand(1),
Call->getOperand(2), expandBits, Call->getOperand(4),
Call->getOperand(5));
}
}

F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getFloatTy(Ctx));
FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();

auto Call = cast<CallInst>(FunctionUser);

IRBuilder<> Builder2(Call);
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};

{
auto expandBits = BC2.Builder.CreateCast(
Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));

auto reinterpretFloatToInt = BC2.Builder.CreateCast(
Instruction::BitCast, Call->getOperand(4), Type::getInt32Ty(Ctx));

Instrument(BC2, BC2.HlslOP->GetI32Const(floatValueIndicator),
GroupIdXandY, GroupIdZ, Call->getOperand(1),
Call->getOperand(2), expandBits, reinterpretFloatToInt,
Call->getOperand(5));
}
}

DM.ReEmitDxilResources();

return true;
}

char DxilPIXMeshShaderOutputInstrumentation::ID = 0;

ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() {
return new DxilPIXMeshShaderOutputInstrumentation();
}

INITIALIZE_PASS(DxilPIXMeshShaderOutputInstrumentation,
"hlsl-dxil-pix-meshshader-output-instrumentation",
"DXIL mesh shader output instrumentation for PIX", false, false)
1 change: 1 addition & 0 deletions lib/DxilPIXPasses/DxilPIXPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ HRESULT SetupRegistryPassForPIX() {
initializeDxilDebugInstrumentationPass(Registry);
initializeDxilForceEarlyZPass(Registry);
initializeDxilOutputColorBecomesConstantPass(Registry);
initializeDxilPIXMeshShaderOutputInstrumentationPass(Registry);
initializeDxilReduceMSAAToSingleSamplePass(Registry);
initializeDxilRemoveDiscardsPass(Registry);
initializeDxilShaderAccessTrackingPass(Registry);
Expand Down
Loading

0 comments on commit eb33030

Please sign in to comment.