Skip to content

Commit

Permalink
Implement shader resource tables (#1165)
Browse files Browse the repository at this point in the history
* Implement shader resource tables

* fix after rebase + squash

* address some review comments

* fix pipeline_common

* cleanup debug stuff

* switch to using single codegenerator
  • Loading branch information
baggins183 authored Nov 1, 2024
1 parent 7b16085 commit 9ec75c3
Show file tree
Hide file tree
Showing 30 changed files with 740 additions and 119 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/frontend/structured_control_flow.h
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
Expand Down
16 changes: 11 additions & 5 deletions src/common/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ DecoderImpl::DecoderImpl() {

DecoderImpl::~DecoderImpl() = default;

std::string DecoderImpl::disassembleInst(ZydisDecodedInstruction& inst,
ZydisDecodedOperand* operands, u64 address) {
const int bufLen = 256;
char szBuffer[bufLen];
ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible,
szBuffer, sizeof(szBuffer), address, ZYAN_NULL);
return szBuffer;
}

void DecoderImpl::printInstruction(void* code, u64 address) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE];
Expand All @@ -27,11 +36,8 @@ void DecoderImpl::printInstruction(void* code, u64 address) {

void DecoderImpl::printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
u64 address) {
const int bufLen = 256;
char szBuffer[bufLen];
ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible,
szBuffer, sizeof(szBuffer), address, ZYAN_NULL);
fmt::print("instruction: {}\n", szBuffer);
std::string s = disassembleInst(inst, operands, address);
fmt::print("instruction: {}\n", s);
}

ZyanStatus DecoderImpl::decodeInstruction(ZydisDecodedInstruction& inst,
Expand Down
2 changes: 2 additions & 0 deletions src/common/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class DecoderImpl {
DecoderImpl();
~DecoderImpl();

std::string disassembleInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
u64 address);
void printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, u64 address);
void printInstruction(void* code, u64 address);
ZyanStatus decodeInstruction(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
Expand Down
14 changes: 14 additions & 0 deletions src/common/hash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include "common/types.h"

[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
return seed ^ (hash + 0x9e3779b9 + (seed << 12) + (seed >> 4));
}

[[nodiscard]] inline u32 HashCombine(const u32 seed, const u32 hash) {
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/assert.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"

Expand Down Expand Up @@ -146,9 +147,14 @@ void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}

Id EmitReadConst(EmitContext& ctx) {
return ctx.u32_zero_value;
UNREACHABLE_MSG("Unreachable instruction");
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
u32 flatbuf_off_dw = inst->Flags<u32>();
ASSERT(ctx.srt_flatbuf.binding >= 0);
ASSERT(flatbuf_off_dw > 0);
Id index = ctx.ConstU32(flatbuf_off_dw);
auto& buffer = ctx.srt_flatbuf;
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
return ctx.OpLoad(ctx.U32[1], ptr);
}

Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void EmitSetVectorRegister(EmitContext& ctx);
void EmitSetGotoVariable(EmitContext& ctx);
void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Expand Down
45 changes: 35 additions & 10 deletions src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
#include "common/assert.h"
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "video_core/amdgpu/types.h"

#include <boost/container/static_vector.hpp>
#include <fmt/format.h>

#include <numbers>
#include <string_view>

namespace Shader::Backend::SPIRV {
namespace {
Expand Down Expand Up @@ -435,14 +437,16 @@ void EmitContext::DefinePushDataBlock() {

void EmitContext::DefineBuffers() {
boost::container::small_vector<Id, 8> type_ids;
const auto define_struct = [&](Id record_array_type, bool is_instance_data) {
const auto define_struct = [&](Id record_array_type, bool is_instance_data,
std::optional<std::string_view> explicit_name = {}) {
const Id struct_type{TypeStruct(record_array_type)};
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
return struct_type;
}
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
: fmt::format("{}_cbuf_block_f32", stage);
auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
: fmt::format("{}_cbuf_block_f32", stage);
name = explicit_name.value_or(name);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
Expand All @@ -451,6 +455,29 @@ void EmitContext::DefineBuffers() {
return struct_type;
};

if (info.has_readconst) {
const Id data_type = U32[1];
const auto storage_class = spv::StorageClass::Uniform;
const Id pointer_type = TypePointer(storage_class, data_type);
const Id record_array_type{
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};

const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};

const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, "srt_flatbuf_ubo");

srt_flatbuf = {
.id = id,
.binding = binding.buffer++,
.pointer_type = pointer_type,
};
interfaces.push_back(id);
}

for (const auto& desc : info.buffers) {
const auto sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(sharp);
Expand All @@ -471,7 +498,7 @@ void EmitContext::DefineBuffers() {
if (is_storage && !desc.is_written) {
Decorate(id, spv::Decoration::NonWritable);
}
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sharp_idx));

buffers.push_back({
.id = id,
Expand All @@ -495,7 +522,7 @@ void EmitContext::DefineTextureBuffers() {
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sharp_idx));
texture_buffers.push_back({
.id = id,
.binding = binding.buffer++,
Expand Down Expand Up @@ -582,7 +609,7 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
}

Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
const u32 sampled = desc.is_storage ? 2 : 1;
switch (desc.type) {
Expand Down Expand Up @@ -618,8 +645,7 @@ void EmitContext::DefineImagesAndSamplers() {
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
image_desc.dword_offset));
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
images.push_back({
.data_types = &data_types,
.id = id,
Expand All @@ -643,8 +669,7 @@ void EmitContext::DefineImagesAndSamplers() {
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base,
samp_desc.dword_offset));
Name(id, fmt::format("{}_{}{}", stage, "samp", samp_desc.sharp_idx));
samplers.push_back(id);
interfaces.push_back(id);
}
Expand Down
1 change: 1 addition & 0 deletions src/shader_recompiler/backend/spirv/spirv_emit_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ class EmitContext final : public Sirit::Module {
Bindings& binding;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
BufferDefinition srt_flatbuf;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;

Expand Down
4 changes: 4 additions & 0 deletions src/shader_recompiler/frontend/translate/scalar_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::EmitScalarMemory(const GcnInst& inst) {
switch (inst.opcode) {
// SMRD
case Opcode::S_LOAD_DWORD:
return S_LOAD_DWORD(1, inst);
case Opcode::S_LOAD_DWORDX2:
return S_LOAD_DWORD(2, inst);
case Opcode::S_LOAD_DWORDX4:
return S_LOAD_DWORD(4, inst);
case Opcode::S_LOAD_DWORDX8:
Expand Down
5 changes: 2 additions & 3 deletions src/shader_recompiler/frontend/translate/translate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
IR::VectorReg dst_reg{attrib.dest_vgpr};

// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
for (u32 i = 0; i < 4; i++) {
const IR::F32 comp = [&] {
switch (buffer.GetSwizzle(i)) {
Expand Down Expand Up @@ -418,8 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32,
.is_instance_data = true,
});
Expand Down
47 changes: 33 additions & 14 deletions src/shader_recompiler/info.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once

#include <algorithm>
#include <span>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/params.h"
Expand All @@ -36,8 +39,7 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
struct Info;

struct BufferResource {
u32 sgpr_base;
u32 dword_offset;
u32 sharp_idx;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_gds_buffer{};
Expand All @@ -53,8 +55,7 @@ struct BufferResource {
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;

struct TextureBufferResource {
u32 sgpr_base;
u32 dword_offset;
u32 sharp_idx;
AmdGpu::NumberFormat nfmt;
bool is_written{};

Expand All @@ -63,8 +64,7 @@ struct TextureBufferResource {
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;

struct ImageResource {
u32 sgpr_base;
u32 dword_offset;
u32 sharp_idx;
AmdGpu::ImageType type;
AmdGpu::NumberFormat nfmt;
bool is_storage{};
Expand All @@ -77,8 +77,7 @@ struct ImageResource {
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;

struct SamplerResource {
u32 sgpr_base;
u32 dword_offset;
u32 sharp_idx;
AmdGpu::Sampler inline_sampler{};
u32 associated_image : 4;
u32 disable_aniso : 1;
Expand Down Expand Up @@ -180,6 +179,9 @@ struct Info {
ImageResourceList images;
SamplerResourceList samplers;

PersistentSrtInfo srt_info;
std::vector<u32> flattened_ud_buf;

std::span<const u32> user_data;
Stage stage;

Expand All @@ -199,14 +201,20 @@ struct Info {
bool uses_fp64{};
bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_readconst{};
u8 mrt_mask{0u};

explicit Info(Stage stage_, ShaderParams params)
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
user_data{params.user_data} {}

template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
inline T ReadUdSharp(u32 sharp_idx) const noexcept {
return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
}

template <typename T>
T ReadUdReg(u32 ptr_index, u32 dword_offset) const noexcept {
T data;
const u32* base = user_data.data();
if (ptr_index != IR::NumScalarRegs) {
Expand All @@ -228,7 +236,8 @@ struct Info {
}

void AddBindings(Backend::Bindings& bnd) const {
const auto total_buffers = buffers.size() + texture_buffers.size();
const auto total_buffers =
buffers.size() + texture_buffers.size() + (has_readconst ? 1 : 0);
bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();
Expand All @@ -245,22 +254,32 @@ struct Info {
}
return {vertex_offset, instance_offset};
}

void RefreshFlatBuf() {
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
ASSERT(user_data.size() <= NumUserDataRegs);
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
if (srt_info.walker_func) {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
}
}
};

constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
return inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
}

constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
return info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
}

constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
}

constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
}

} // namespace Shader
Expand Down
4 changes: 4 additions & 0 deletions src/shader_recompiler/ir/basic_block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
} else {
ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
}

if (op == Opcode::ReadConst) {
ret += fmt::format(" (flags={}) ", inst.Flags<u32>());
}
const size_t arg_count{inst.NumArgs()};
for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
const Value arg{inst.Arg(arg_index)};
Expand Down
Loading

0 comments on commit 9ec75c3

Please sign in to comment.