Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image subresources barriers #904

Merged
merged 16 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/renderer_vulkan/vk_master_semaphore.h
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
src/video_core/renderer_vulkan/vk_pipeline_cache.h
src/video_core/renderer_vulkan/vk_pipeline_common.cpp
src/video_core/renderer_vulkan/vk_pipeline_common.h
src/video_core/renderer_vulkan/vk_platform.cpp
src/video_core/renderer_vulkan/vk_platform.h
src/video_core/renderer_vulkan/vk_rasterizer.cpp
Expand Down
7 changes: 5 additions & 2 deletions src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
ImageOperands operands;
operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod);
return ctx.OpBitcast(
ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands));
const Id texel =
texture.is_storage
? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need casts here that match the return type in opcodes.inc like it did before
i.e. vec4 for fetch, uvec4 for read

Copy link
Contributor

@baggins183 baggins183 Sep 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

didnt see the next line. You do f32[4] for fetch but need to do uvec4 when its read, otherwise spirv-val errors

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you encountered any actual validation errors there? Because to me, it looks okay:

        %432 = OpCompositeConstruct %u32vec2_id %u32_id_0 %u32_id_0
        %433 = OpLoad %53 %cs_img12_18
        %434 = OpImageRead %f32vec4_id %433 %432 None
        %435 = OpBitcast %f32vec4_id %434

: ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands);
return ctx.OpBitcast(ctx.F32[4], texel);
}

Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {
Expand Down
4 changes: 3 additions & 1 deletion src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
format);
default:
break;
}
Expand All @@ -534,6 +535,7 @@ void EmitContext::DefineImagesAndSamplers() {
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type,
.image_type = image_type,
.is_storage = image_desc.is_storage,
});
interfaces.push_back(id);
++binding;
Expand Down
5 changes: 3 additions & 2 deletions src/shader_recompiler/backend/spirv/spirv_emit_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ class EmitContext final : public Sirit::Module {
Id sampled_type;
Id pointer_type;
Id image_type;
bool is_storage = false;
};

struct BufferDefinition {
Expand All @@ -216,8 +217,8 @@ class EmitContext final : public Sirit::Module {
u32 binding;
Id image_type;
Id result_type;
bool is_integer;
bool is_storage;
bool is_integer = false;
bool is_storage = false;
};

u32& binding;
Expand Down
1 change: 1 addition & 0 deletions src/shader_recompiler/frontend/decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,7 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) {

m_instruction.control.mimg = *reinterpret_cast<InstControlMIMG*>(&hexInstruction);
m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode);
ASSERT(m_instruction.control.mimg.r128 == 0);
}

void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) {
Expand Down
3 changes: 3 additions & 0 deletions src/shader_recompiler/frontend/translate/export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ void Translator::EmitExport(const GcnInst& inst) {
ir.SetAttribute(attrib, comp, swizzle(i));
}
}
if (IR::IsMrt(attrib)) {
info.mrt_mask |= 1u << u8(attrib);
}
}

} // namespace Shader::Gcn
2 changes: 2 additions & 0 deletions src/shader_recompiler/frontend/translate/vector_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
info.has_offset.Assign(flags.test(MimgModifier::Offset));
info.explicit_lod.Assign(explicit_lod);
info.has_derivatives.Assign(has_derivatives);
info.is_array.Assign(mimg.da);

// Issue IR instruction, leaving unknown fields blank to patch later.
const IR::Value texel = [&]() -> IR::Value {
Expand Down Expand Up @@ -630,6 +631,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
info.has_offset.Assign(flags.test(MimgModifier::Offset));
// info.explicit_lod.Assign(explicit_lod);
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
info.is_array.Assign(mimg.da);

// Issue IR instruction, leaving unknown fields blank to patch later.
const IR::Value texel = [&]() -> IR::Value {
Expand Down
6 changes: 4 additions & 2 deletions src/shader_recompiler/info.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ struct ImageResource {
u32 dword_offset;
AmdGpu::ImageType type;
AmdGpu::NumberFormat nfmt;
bool is_storage;
bool is_depth;
bool is_storage{};
bool is_depth{};
bool is_atomic{};
bool is_array{};

constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
Expand Down Expand Up @@ -171,6 +172,7 @@ struct Info {
bool uses_fp64{};
bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions
u8 mrt_mask{0u};

explicit Info(Stage stage_, ShaderParams params)
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
Expand Down
27 changes: 21 additions & 6 deletions src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,10 @@ class Descriptors {
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset && desc.type == existing.type &&
desc.is_storage == existing.is_storage;
desc.dword_offset == existing.dword_offset;
})};
auto& image = image_resources[index];
image.is_storage |= desc.is_storage;
return index;
}

Expand Down Expand Up @@ -441,18 +442,29 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}

IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
const IR::Value& z, bool is_storage) {
const IR::Value& z, bool is_storage, bool is_array) {
// When cubemap is written with imageStore it is treated like 2DArray.
if (is_storage) {
return ir.CompositeConstruct(s, t, z);
}

ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below

// We need to fix x and y coordinate,
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
// We already force the scale value to be 1.0 when handling v_cubema_f32,
// here we subtract 1.5 to recover the original value.
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
return ir.CompositeConstruct(x, y, z);
if (is_array) {
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
ir.ConvertIToF(32, 32, false, slice_id));
} else {
return ir.CompositeConstruct(x, y, z);
}
}

void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
Expand Down Expand Up @@ -481,14 +493,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_storage = IsImageStorageInstruction(inst);
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
u32 image_binding = descriptors.Add(ImageResource{
.sgpr_base = tsharp.sgpr_base,
.dword_offset = tsharp.dword_offset,
.type = image.GetType(),
.type = type,
.nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
.is_storage = is_storage,
.is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
});

// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
Expand Down Expand Up @@ -545,7 +559,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
case AmdGpu::ImageType::Color3D: // x, y, z
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
case AmdGpu::ImageType::Cube: // x, y, face
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage),
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage,
inst_info.is_array),
body->Arg(3)};
default:
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
Expand Down
1 change: 1 addition & 0 deletions src/shader_recompiler/ir/reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ union TextureInstInfo {
BitField<5, 1, u32> has_offset;
BitField<6, 2, u32> gather_comp;
BitField<8, 1, u32> has_derivatives;
BitField<9, 1, u32> is_array;
};

union BufferInstInfo {
Expand Down
3 changes: 2 additions & 1 deletion src/shader_recompiler/specialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct StageSpecialization {
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetType();
spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
: sharp.GetType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
});
}
Expand Down
14 changes: 12 additions & 2 deletions src/video_core/amdgpu/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,15 @@ struct Image {
return pitch + 1;
}

u32 NumLayers() const {
u32 NumLayers(bool is_array) const {
u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1;
if (GetType() == ImageType::Cube) {
slices *= 6;
if (is_array) {
slices = last_array + 1;
ASSERT(slices % 6 == 0);
} else {
slices = 6;
}
}
if (pow2pad) {
slices = std::bit_ceil(slices);
Expand Down Expand Up @@ -282,6 +287,11 @@ struct Image {
bool IsTiled() const {
return GetTilingMode() != TilingMode::Display_Linear;
}

bool IsPartialCubemap() const {
const auto viewed_slice = last_array - base_array + 1;
return GetType() == ImageType::Cube && viewed_slice < 6;
}
};
static_assert(sizeof(Image) == 32); // 256bits

Expand Down
12 changes: 10 additions & 2 deletions src/video_core/buffer_cache/buffer_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,15 +581,23 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
return false;
}
Image& image = texture_cache.GetImage(image_id);
ASSERT_MSG(device_addr == image.info.guest_address,
"Texel buffer aliases image subresources {:x} : {:x}", device_addr,
image.info.guest_address);
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
u32 offset = buffer.Offset(image.cpu_addr);
const u32 num_layers = image.info.resources.layers;
u32 total_size = 0;
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
offset += mip_ofs * num_layers;
if (offset + (mip_size * num_layers) > buffer.SizeBytes()) {
break;
}
copies.push_back({
.bufferOffset = offset,
.bufferRowLength = static_cast<u32>(mip_pitch),
Expand All @@ -603,11 +611,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
offset += mip_ofs * num_layers;
total_size += mip_size * num_layers;
}
if (!copies.empty()) {
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);
Expand Down
10 changes: 10 additions & 0 deletions src/video_core/renderer_vulkan/liverpool_to_vk.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <span>
#include "common/assert.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/resource.h"
Expand Down Expand Up @@ -55,4 +56,13 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags support

void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);

static inline vk::Format PromoteFormatToDepth(vk::Format fmt) {
if (fmt == vk::Format::eR32Sfloat) {
return vk::Format::eD32Sfloat;
} else if (fmt == vk::Format::eR16Unorm) {
return vk::Format::eD16Unorm;
}
UNREACHABLE();
}

} // namespace Vulkan::LiverpoolToVK
10 changes: 6 additions & 4 deletions src/video_core/renderer_vulkan/renderer_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();

image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf);
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {},
cmdbuf);

const std::array pre_barrier{
vk::ImageMemoryBarrier{
Expand All @@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop

// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
cmdbuf.blitImage(
image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height),
vk::Filter::eLinear);

Expand Down Expand Up @@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) {

auto& scheduler = present_scheduler;
const auto cmdbuf = scheduler.CommandBuffer();

ImGui::Core::Render(cmdbuf, frame);

{
auto* profiler_ctx = instance.GetProfilerContext();
TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame",
Expand Down Expand Up @@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) {
},
};

ImGui::Core::Render(cmdbuf, frame);

cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
Expand Down
Loading
Loading