From e387c22199ff6828c98ada2201f7ee9842b8d797 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 11 Sep 2024 22:29:06 +0200 Subject: [PATCH 01/16] video_core: texture: image subresources state tracking --- src/video_core/buffer_cache/buffer_cache.cpp | 2 +- .../renderer_vulkan/renderer_vulkan.cpp | 10 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/image.cpp | 157 +++++++++++++----- src/video_core/texture_cache/image.h | 21 ++- .../texture_cache/texture_cache.cpp | 23 ++- 8 files changed, 159 insertions(+), 62 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 4530f690e8..4fa7c6bdb4 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -607,7 +607,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d019ff0346..d7954bf79e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}, + cmdbuf); const std::array pre_barrier{ vk::ImageMemoryBarrier{ @@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. cmdbuf.blitImage( - image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, + image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), vk::Filter::eLinear); @@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) { auto& scheduler = present_scheduler; const auto cmdbuf = scheduler.CommandBuffer(); + + ImGui::Core::Render(cmdbuf, frame); + { auto* profiler_ctx = instance.GetProfilerContext(); TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", @@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) { }, }; - ImGui::Core::Render(cmdbuf, frame); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index d9296b5014..54f3549690 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -220,7 +220,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + image.last_state.layout); } else if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dc311a7c69..7a851386a5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -453,7 +453,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + image.last_state.layout); } else if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23f60da13f..9d8b425236 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -240,7 +240,7 @@ void Rasterizer::BeginRendering() { state.depth_image = image.image; state.depth_attachment = { .imageView = *image_view.image_view, - .imageLayout = image.layout, + .imageLayout = image.last_state.layout, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 9e8c38f0de..bec11ea1d9 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #define VULKAN_HPP_NO_EXCEPTIONS +#include #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -179,52 +180,128 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, info.guest_size_bytes); } -void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf) { - if (dst_layout == layout && dst_mask == access_mask) { - return; +boost::container::small_vector Image::GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range) { + const bool needs_partial_transition = + subres_range && + (subres_range->base != SubresourceBase{} || subres_range->extent != info.resources); + const bool partially_transited = !subresource_states.empty(); + + boost::container::small_vector barriers{}; + if (needs_partial_transition || partially_transited) { + if (!partially_transited) { + subresource_states.resize(info.resources.levels * info.resources.layers); + std::fill(subresource_states.begin(), subresource_states.end(), last_state); + } + + // In case of partial transition, we need to change the specified subresources only. + // Otherwise all subresources need to be set to the same state so we can use a full + // resource transition for the next time. + const auto mips = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.level, subres_range->extent.levels) + : std::views::iota(0u, info.resources.levels); + const auto layers = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.layer, subres_range->extent.layers) + : std::views::iota(0u, info.resources.layers); + + for (u32 mip : mips) { + for (u32 layer : layers) { + // NOTE: these loops may produce a lot of small barriers. + // If this becomes a problem, we can optimize it by merging adjacent barriers. + auto& state = subresource_states[mip * info.resources.layers + layer]; + + if (state.layout != dst_layout || state.access_mask != dst_mask) { + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = state.pl_stage, + .srcAccessMask = state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = mip, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }, + }); + state.layout = dst_layout; + state.access_mask = dst_mask; + state.pl_stage = dst_stage; + } + } + } + + if (!needs_partial_transition) { + subresource_states.clear(); + } + } else { // Full resource transition + if (last_state.layout == dst_layout && last_state.access_mask == dst_mask) { + return {}; + } + + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = last_state.pl_stage, + .srcAccessMask = last_state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = last_state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); } - const vk::ImageMemoryBarrier barrier = { - .srcAccessMask = access_mask, - .dstAccessMask = dst_mask, - .oldLayout = layout, - .newLayout = dst_layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + last_state.layout = dst_layout; + last_state.access_mask = dst_mask; + last_state.pl_stage = dst_stage; + return barriers; +} + +void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf /*= {}*/) { // Adjust pipieline stage - const vk::PipelineStageFlags dst_pl_stage = - (dst_mask == vk::AccessFlagBits::eTransferRead || - dst_mask == vk::AccessFlagBits::eTransferWrite) - ? vk::PipelineStageFlagBits::eTransfer - : vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader; + const vk::PipelineStageFlags2 dst_pl_stage = + (dst_mask == vk::AccessFlagBits2::eTransferRead || + dst_mask == vk::AccessFlagBits2::eTransferWrite) + ? vk::PipelineStageFlagBits2::eTransfer + : vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader; + + const auto barriers = GetBarriers(dst_layout, dst_mask, dst_pl_stage, range); + if (barriers.empty()) { + return; + } if (!cmdbuf) { // When using external cmdbuf you are responsible for ending rp. scheduler->EndRendering(); cmdbuf = scheduler->CommandBuffer(); } - cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {}, - barrier); - - layout = dst_layout; - access_mask = dst_mask; - pl_stage = dst_pl_stage; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); } void Image::Upload(vk::Buffer buffer, u64 offset) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); // Copy to the image. const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil @@ -248,12 +325,12 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyImage(const Image& image) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -279,15 +356,16 @@ void Image::CopyImage(const Image& image) { .extent = {mip_w, mip_h, mip_d}, }); } - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyMip(const Image& image, u32 mip) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -313,10 +391,11 @@ void Image::CopyMip(const Image& image, u32 mip) { }, .extent = {mip_w, mip_h, mip_d}, }; - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } Image::~Image() = default; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 1bbb975bac..e644b81c06 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -91,8 +91,11 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } - void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf = {}); + boost::container::small_vector GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range); + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); void CopyImage(const Image& image); @@ -111,10 +114,16 @@ struct Image { // Resource state tracking vk::ImageUsageFlags usage; - vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; - vk::Flags access_mask = vk::AccessFlagBits::eNone; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; - boost::container::small_vector mip_hashes; + struct State { + u32 mip_level{}; + u32 layer{}; + vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; + vk::Flags access_mask = vk::AccessFlagBits2::eNone; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + }; + State last_state{}; + std::vector subresource_states{}; + boost::container::small_vector mip_hashes{}; u64 tick_accessed_last{0}; }; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8621e95f5f..f7b89fac92 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -154,7 +154,7 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_ if (tex_cache_image.info.IsMipOf(image_info)) { tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, - vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eTransferRead, {}); const auto num_mips_to_copy = tex_cache_image.info.resources.levels; ASSERT(num_mips_to_copy == 1); @@ -176,7 +176,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { auto& src_image = slot_images[image_id]; auto& new_image = slot_images[new_image_id]; - src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); new_image.CopyImage(src_image); FreeImage(image_id); @@ -263,13 +263,14 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& if (view_info.is_storage) { image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, + view_info.range); usage.storage = true; } else { const auto new_layout = image.info.IsDepthStencil() ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eShaderReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eShaderRead); + image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range); usage.texture = true; } @@ -284,8 +285,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, - vk::AccessFlagBits::eColorAttachmentWrite | - vk::AccessFlagBits::eColorAttachmentRead); + vk::AccessFlagBits2::eColorAttachmentWrite | + vk::AccessFlagBits2::eColorAttachmentRead, + view_info.range); // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -330,8 +332,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, : vk::ImageLayout::eDepthAttachmentOptimal : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eDepthReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | - vk::AccessFlagBits::eDepthStencilAttachmentRead); + image.Transit(new_layout, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite | + vk::AccessFlagBits2::eDepthStencilAttachmentRead, + view_info.range); // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -404,7 +408,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, + cmdbuf); const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; From 3d2ea32dae96c1d60deec96f63dd18645820273a Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 12 Sep 2024 22:47:54 +0200 Subject: [PATCH 02/16] shader_recompiler: use one binding if the same image is read and written --- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 7 +++++-- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 1 + src/shader_recompiler/backend/spirv/spirv_emit_context.h | 5 +++-- src/shader_recompiler/ir/passes/resource_tracking_pass.cpp | 5 +++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 530f381d77..1944db07fd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -157,8 +157,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const ImageOperands operands; operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); - return ctx.OpBitcast( - ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); + const Id texel = + texture.is_storage + ? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands) + : ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands); + return ctx.OpBitcast(ctx.F32[4], texel); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2f1f7aa753..7f70aee7c0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -534,6 +534,7 @@ void EmitContext::DefineImagesAndSamplers() { .sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type), .pointer_type = pointer_type, .image_type = image_type, + .is_storage = image_desc.is_storage, }); interfaces.push_back(id); ++binding; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0908b7f828..1a968390e0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -200,6 +200,7 @@ class EmitContext final : public Sirit::Module { Id sampled_type; Id pointer_type; Id image_type; + bool is_storage = false; }; struct BufferDefinition { @@ -216,8 +217,8 @@ class EmitContext final : public Sirit::Module { u32 binding; Id image_type; Id result_type; - bool is_integer; - bool is_storage; + bool is_integer = false; + bool is_storage = false; }; u32& binding; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index a56c9f4970..141bf20fb4 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -200,9 +200,10 @@ class Descriptors { u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && - desc.dword_offset == existing.dword_offset && desc.type == existing.type && - desc.is_storage == existing.is_storage; + desc.dword_offset == existing.dword_offset; })}; + auto& image = image_resources[index]; + image.is_storage |= desc.is_storage; return index; } From e1a7c737d8a9b67971611ebfb509aa9effcbbe69 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 13 Sep 2024 22:26:42 +0200 Subject: [PATCH 03/16] video_core: added rebinding of changed textures after overlap resolve --- CMakeLists.txt | 2 + .../renderer_vulkan/vk_compute_pipeline.cpp | 33 +------- .../renderer_vulkan/vk_compute_pipeline.h | 21 ++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 36 +-------- .../renderer_vulkan/vk_graphics_pipeline.h | 27 ++----- .../renderer_vulkan/vk_pipeline_common.cpp | 78 +++++++++++++++++++ .../renderer_vulkan/vk_pipeline_common.h | 47 +++++++++++ src/video_core/texture_cache/image.h | 2 + .../texture_cache/texture_cache.cpp | 4 + 9 files changed, 150 insertions(+), 100 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_common.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_common.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b334f5940e..1980b36894 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -579,6 +579,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_master_semaphore.h src/video_core/renderer_vulkan/vk_pipeline_cache.cpp src/video_core/renderer_vulkan/vk_pipeline_cache.h + src/video_core/renderer_vulkan/vk_pipeline_common.cpp + src/video_core/renderer_vulkan/vk_pipeline_common.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h src/video_core/renderer_vulkan/vk_rasterizer.cpp diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 54f3549690..e06443ada1 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "common/alignment.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" @@ -15,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, u64 compute_key_, const Shader::Info& info_, vk::ShaderModule module) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, compute_key{compute_key_}, + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_}, info{&info_} { const vk::PipelineShaderStageCreateInfo shader_ci = { .stage = vk::ShaderStageFlagBits::eCompute, @@ -213,36 +214,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, }); } - for (const auto& image_desc : info->images) { - const auto tsharp = image_desc.GetSharp(*info); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, - image.last_state.layout); - } else if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); + BindTextures(texture_cache, *info, binding, set_writes); - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)"); - } - } for (const auto& sampler : info->samplers) { const auto ssharp = sampler.GetSharp(*info); if (ssharp.force_degamma) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8a6213a29c..f1bc7285a9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -3,9 +3,8 @@ #pragma once -#include -#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { class BufferCache; @@ -18,27 +17,17 @@ class Instance; class Scheduler; class DescriptorHeap; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(const Instance& instance, Scheduler& scheduler, - DescriptorHeap& desc_heap, vk::PipelineCache pipeline_cache, - u64 compute_key, const Shader::Info& info, vk::ShaderModule module); + ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + vk::PipelineCache pipeline_cache, u64 compute_key, const Shader::Info& info, + vk::ShaderModule module); ~ComputePipeline(); - [[nodiscard]] vk::Pipeline Handle() const noexcept { - return *pipeline; - } - bool BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; private: - const Instance& instance; - Scheduler& scheduler; - DescriptorHeap& desc_heap; - vk::UniquePipeline pipeline; - vk::UniquePipelineLayout pipeline_layout; - vk::UniqueDescriptorSetLayout desc_layout; u64 compute_key; const Shader::Info* info; bool uses_push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7a851386a5..4f6dc2e3eb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -21,7 +21,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul vk::PipelineCache pipeline_cache, std::span infos, std::span modules) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, key{key_} { + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); BuildDescSetLayout(); @@ -444,45 +444,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, }); } - boost::container::static_vector tsharps; - for (const auto& image_desc : stage->images) { - const auto tsharp = image_desc.GetSharp(*stage); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - tsharps.emplace_back(tsharp); - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, - image.last_state.layout); - } else if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); + BindTextures(texture_cache, *stage, binding, set_writes); - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); - } - } for (const auto& sampler : stage->samplers) { auto ssharp = sampler.GetSharp(*stage); if (ssharp.force_degamma) { LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); } if (sampler.disable_aniso) { - const auto& tsharp = tsharps[sampler.associated_image]; + const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage); if (tsharp.base_level == 0 && tsharp.last_level == 0) { ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c8a08b4f26..77d6a0adbf 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,7 +5,7 @@ #include "common/types.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" -#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { class BufferCache; @@ -50,26 +50,17 @@ struct GraphicsPipelineKey { } }; -class GraphicsPipeline { +class GraphicsPipeline : public Pipeline { public: - explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, - DescriptorHeap& desc_heap, const GraphicsPipelineKey& key, - vk::PipelineCache pipeline_cache, - std::span stages, - std::span modules); + GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, + std::span stages, + std::span modules); ~GraphicsPipeline(); void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; - vk::Pipeline Handle() const noexcept { - return *pipeline; - } - - vk::PipelineLayout GetLayout() const { - return *pipeline_layout; - } - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { return *stages[u32(stage)]; } @@ -91,12 +82,6 @@ class GraphicsPipeline { void BuildDescSetLayout(); private: - const Instance& instance; - Scheduler& scheduler; - DescriptorHeap& desc_heap; - vk::UniquePipeline pipeline; - vk::UniquePipelineLayout pipeline_layout; - vk::UniqueDescriptorSetLayout desc_layout; std::array stages{}; GraphicsPipelineKey key; bool uses_push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp new file mode 100644 index 0000000000..6a3de7b61b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace Vulkan { + +Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, + vk::PipelineCache pipeline_cache) + : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {} + +Pipeline::~Pipeline() = default; + +void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + u32& binding, DescriptorWrites& set_writes) const { + static boost::container::static_vector image_infos; + image_infos.clear(); + + using ImageBindingInfo = std::tuple; + boost::container::static_vector image_bindings; + + for (const auto& image_desc : stage.images) { + const auto tsharp = image_desc.GetSharp(stage); + if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; + VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; + auto& image_view = texture_cache.FindTexture(image_info, view_info); + auto& image = texture_cache.GetImage(image_view.image_id); + image.flags |= VideoCore::ImageFlagBits::Bound; + image_bindings.emplace_back(&image, &image_view, image_desc.is_storage); + } else { + image_bindings.emplace_back(nullptr, nullptr, image_desc.is_storage); + } + + if (texture_cache.IsMeta(tsharp.Address())) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); + } + } + + // Second pass to re-bind images that were updated after binding + for (auto& [image, image_view, is_storage] : image_bindings) { + if (!image || !image_view) { + if (instance.IsNullDescriptorSupported()) { + image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + } else { + auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + vk::ImageLayout::eGeneral); + } + } else { + vk::ImageLayout layout = image->last_state.layout; + if (True(image->flags & VideoCore::ImageFlagBits::NeedsRebind)) { + image_view = &texture_cache.FindTexture(image->info, image_view->info); + layout = texture_cache.GetImage(image_view->image_id).last_state.layout; + } + image_infos.emplace_back(VK_NULL_HANDLE, *image_view->image_view, layout); + image->flags &= + ~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound); + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = + is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, + .pImageInfo = &image_infos.back(), + }); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h new file mode 100644 index 0000000000..90fe3afceb --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore { +class BufferCache; +class TextureCache; +} // namespace VideoCore + +namespace Vulkan { + +class Instance; +class Scheduler; +class DescriptorHeap; + +class Pipeline { +public: + Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + vk::PipelineCache pipeline_cache); + virtual ~Pipeline(); + + vk::Pipeline Handle() const noexcept { + return *pipeline; + } + + vk::PipelineLayout GetLayout() const noexcept { + return *pipeline_layout; + } + + using DescriptorWrites = boost::container::small_vector; + void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + u32& binding, DescriptorWrites& set_writes) const; + +protected: + const Instance& instance; + Scheduler& scheduler; + DescriptorHeap& desc_heap; + vk::UniquePipeline pipeline; + vk::UniquePipelineLayout pipeline_layout; + vk::UniqueDescriptorSetLayout desc_layout; +}; + +} // namespace Vulkan diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index e644b81c06..acdd2e6322 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,6 +32,8 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered + Bound = 1 << 9, ///< True when the image is bound to a descriptor set + NeedsRebind = 1 << 10, ///< True when the image needs to be rebound }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index f7b89fac92..449157353c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -179,6 +179,10 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); new_image.CopyImage(src_image); + if (True(src_image.flags & ImageFlagBits::Bound)) { + src_image.flags |= ImageFlagBits::NeedsRebind; + } + FreeImage(image_id); TrackImage(new_image_id); From 6cce6599b6e4b57d835eec70f037e98cf751edb0 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 14 Sep 2024 00:09:05 +0200 Subject: [PATCH 04/16] don't use pointers; slight `FindTexture` refactoring --- .../renderer_vulkan/vk_pipeline_common.cpp | 29 ++++++++++--------- .../texture_cache/texture_cache.cpp | 3 +- src/video_core/texture_cache/texture_cache.h | 5 ++-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 6a3de7b61b..e161c0c44f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -21,20 +21,19 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader static boost::container::static_vector image_infos; image_infos.clear(); - using ImageBindingInfo = std::tuple; + using ImageBindingInfo = std::tuple; boost::container::static_vector image_bindings; for (const auto& image_desc : stage.images) { const auto tsharp = image_desc.GetSharp(stage); if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - auto& image_view = texture_cache.FindTexture(image_info, view_info); - auto& image = texture_cache.GetImage(image_view.image_id); + const auto image_id = texture_cache.FindImage(image_info); + auto& image = texture_cache.GetImage(image_id); image.flags |= VideoCore::ImageFlagBits::Bound; - image_bindings.emplace_back(&image, &image_view, image_desc.is_storage); + image_bindings.emplace_back(image_id, tsharp, image_desc.is_storage); } else { - image_bindings.emplace_back(nullptr, nullptr, image_desc.is_storage); + image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc.is_storage); } if (texture_cache.IsMeta(tsharp.Address())) { @@ -43,8 +42,8 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader } // Second pass to re-bind images that were updated after binding - for (auto& [image, image_view, is_storage] : image_bindings) { - if (!image || !image_view) { + for (auto [image_id, tsharp, is_storage] : image_bindings) { + if (!image_id) { if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { @@ -53,13 +52,15 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader vk::ImageLayout::eGeneral); } } else { - vk::ImageLayout layout = image->last_state.layout; - if (True(image->flags & VideoCore::ImageFlagBits::NeedsRebind)) { - image_view = &texture_cache.FindTexture(image->info, image_view->info); - layout = texture_cache.GetImage(image_view->image_id).last_state.layout; + auto& image = texture_cache.GetImage(image_id); + if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) { + image_id = texture_cache.FindImage(image.info); } - image_infos.emplace_back(VK_NULL_HANDLE, *image_view->image_view, layout); - image->flags &= + VideoCore::ImageViewInfo view_info{tsharp, is_storage}; + auto& image_view = texture_cache.FindTexture(image_id, view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + texture_cache.GetImage(image_id).last_state.layout); + image.flags &= ~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound); } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 449157353c..ef402ecc40 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -259,8 +259,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(info); +ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { Image& image = slot_images[image_id]; UpdateImage(image_id); auto& usage = image.info.usage; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 329128a3ca..b2a8f13f4e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -59,9 +59,8 @@ class TextureCache { /// Retrieves the image handle of the image with the provided attributes. [[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {}); - /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, - const ImageViewInfo& view_info); + /// Retrieves an image view with the properties of the specified image id. + [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info, From 673ff8d74a742da41206a8458d831a04a832b9bd Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 14 Sep 2024 20:26:47 +0200 Subject: [PATCH 05/16] video_core: buffer_cache: don't copy over the image size --- src/video_core/buffer_cache/buffer_cache.cpp | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 4fa7c6bdb4..7815abab4d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -581,15 +581,22 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return false; } Image& image = texture_cache.GetImage(image_id); + ASSERT_MSG(buffer.CpuAddr() == image.info.guest_address, + "Texel buffer aliases image subresources {} : {}", buffer.CpuAddr(), + image.info.guest_address); boost::container::small_vector copies; u32 offset = buffer.Offset(image.cpu_addr); const u32 num_layers = image.info.resources.layers; + u32 total_size = 0; for (u32 m = 0; m < image.info.resources.levels; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + if (total_size + mip_size > buffer.SizeBytes()) { + break; + } copies.push_back({ .bufferOffset = offset, .bufferRowLength = static_cast(mip_pitch), @@ -604,14 +611,14 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, .imageExtent = {width, height, depth}, }); offset += mip_ofs * num_layers; + total_size += mip_size; } - if (!copies.empty()) { - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); - } + ASSERT(!copies.empty()); // If triggered, need to find which layers fit + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); return true; } From 9a5ada4138c71330a725af5154662d9dc16244ec Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 16 Sep 2024 22:53:35 +0200 Subject: [PATCH 06/16] redundant barriers removed; fixes --- src/video_core/buffer_cache/buffer_cache.cpp | 10 ++-- .../renderer_vulkan/vk_scheduler.cpp | 52 ------------------- src/video_core/texture_cache/image.h | 2 - 3 files changed, 5 insertions(+), 59 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 7815abab4d..a3474052fd 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -581,8 +581,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return false; } Image& image = texture_cache.GetImage(image_id); - ASSERT_MSG(buffer.CpuAddr() == image.info.guest_address, - "Texel buffer aliases image subresources {} : {}", buffer.CpuAddr(), + ASSERT_MSG(device_addr == image.info.guest_address, + "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); boost::container::small_vector copies; u32 offset = buffer.Offset(image.cpu_addr); @@ -594,7 +594,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - if (total_size + mip_size > buffer.SizeBytes()) { + offset += mip_ofs * num_layers; + if (offset + (mip_size * num_layers) > buffer.SizeBytes()) { break; } copies.push_back({ @@ -610,8 +611,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, }); - offset += mip_ofs * num_layers; - total_size += mip_size; + total_size += mip_size * num_layers; } ASSERT(!copies.empty()); // If triggered, need to find which layers fit scheduler.EndRendering(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b99dfdbb46..08b5014ec1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -59,58 +59,6 @@ void Scheduler::EndRendering() { } is_rendering = false; current_cmdbuf.endRendering(); - - boost::container::static_vector barriers; - for (size_t i = 0; i < render_state.num_color_attachments; ++i) { - barriers.push_back(vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, - .newLayout = vk::ImageLayout::eColorAttachmentOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = render_state.color_images[i], - .subresourceRange = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - } - if (render_state.has_depth || render_state.has_stencil) { - barriers.push_back(vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = render_state.depth_attachment.imageLayout, - .newLayout = render_state.depth_attachment.imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = render_state.depth_image, - .subresourceRange = - { - .aspectMask = vk::ImageAspectFlagBits::eDepth | - (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil - : vk::ImageAspectFlagBits::eNone), - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - } - - if (!barriers.empty()) { - const auto src_stages = - vk::PipelineStageFlagBits::eColorAttachmentOutput | - (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | - vk::PipelineStageFlagBits::eEarlyFragmentTests - : vk::PipelineStageFlagBits::eNone); - current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, - vk::DependencyFlagBits::eByRegion, {}, {}, barriers); - } } void Scheduler::Flush(SubmitInfo& info) { diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index acdd2e6322..02a48b6a18 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -117,8 +117,6 @@ struct Image { // Resource state tracking vk::ImageUsageFlags usage; struct State { - u32 mip_level{}; - u32 layer{}; vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; vk::Flags access_mask = vk::AccessFlagBits2::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; From d0e6ce6918ed87b4eea7ef986cc4690da8daab38 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 16 Sep 2024 23:00:11 +0200 Subject: [PATCH 07/16] regression fixes --- src/video_core/buffer_cache/buffer_cache.cpp | 13 ++++++----- .../renderer_vulkan/vk_compute_pipeline.cpp | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 ++++++++++--------- .../renderer_vulkan/vk_pipeline_common.cpp | 4 ++-- .../renderer_vulkan/vk_pipeline_common.h | 1 + 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index a3474052fd..ac168f18ee 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -613,12 +613,13 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, }); total_size += mip_size * num_layers; } - ASSERT(!copies.empty()); // If triggered, need to find which layers fit - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); + if (!copies.empty()) { + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); + } return true; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index e06443ada1..e10b704897 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -109,12 +109,13 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, // Bind resource buffers and textures. boost::container::static_vector buffer_views; boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; u32 binding{}; + image_infos.clear(); + for (const auto& desc : info->buffers) { bool is_storage = true; if (desc.is_gds_buffer) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4f6dc2e3eb..5fa995b466 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -41,8 +41,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); - boost::container::static_vector bindings; - boost::container::static_vector attributes; + boost::container::static_vector vertex_bindings; + boost::container::static_vector vertex_attributes; const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; for (const auto& input : vs_info->vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || @@ -52,13 +52,13 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } const auto buffer = vs_info->ReadUd(input.sgpr_base, input.dword_offset); - attributes.push_back({ + vertex_attributes.push_back({ .location = input.binding, .binding = input.binding, .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); - bindings.push_back({ + vertex_bindings.push_back({ .binding = input.binding, .stride = buffer.GetStride(), .inputRate = input.instance_step_rate == Shader::Info::VsInput::None @@ -68,10 +68,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = static_cast(bindings.size()), - .pVertexBindingDescriptions = bindings.data(), - .vertexAttributeDescriptionCount = static_cast(attributes.size()), - .pVertexAttributeDescriptions = attributes.data(), + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), }; if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { @@ -291,8 +291,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul GraphicsPipeline::~GraphicsPipeline() = default; void GraphicsPipeline::BuildDescSetLayout() { - u32 binding{}; boost::container::small_vector bindings; + u32 binding{}; + for (const auto* stage : stages) { if (!stage) { continue; @@ -352,12 +353,13 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, // Bind resource buffers and textures. boost::container::static_vector buffer_views; boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; u32 binding{}; + image_infos.clear(); + for (const auto* stage : stages) { if (!stage) { continue; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index e161c0c44f..50df7aecd7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -10,6 +10,8 @@ namespace Vulkan { +boost::container::static_vector Pipeline::image_infos; + Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache) : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {} @@ -18,8 +20,6 @@ Pipeline::~Pipeline() = default; void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, u32& binding, DescriptorWrites& set_writes) const { - static boost::container::static_vector image_infos; - image_infos.clear(); using ImageBindingInfo = std::tuple; boost::container::static_vector image_bindings; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 90fe3afceb..627ce389a7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -42,6 +42,7 @@ class Pipeline { vk::UniquePipeline pipeline; vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; + static boost::container::static_vector image_infos; }; } // namespace Vulkan From 0bf3678b6d00f659ee5426744ea2363b38202d7b Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 16 Sep 2024 00:18:18 +0200 Subject: [PATCH 08/16] texture_cache: 3d texture layers count fixup --- src/video_core/texture_cache/image_view.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index f94c1a37b6..1e308ccec8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -79,6 +79,10 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexc range.base.layer = image.base_array; range.extent.levels = image.last_level + 1; range.extent.layers = image.last_array + 1; + if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { + // Some games pass incorrect layer count for 3D textures so we need to fixup it + range.extent.layers = 1; + } if (!is_storage) { mapping.r = ConvertComponentSwizzle(image.dst_sel_x); mapping.g = ConvertComponentSwizzle(image.dst_sel_y); From 223a7e4eef5282ad1e7f5b08f68b20110d55241f Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 16 Sep 2024 23:01:22 +0200 Subject: [PATCH 09/16] shader_recompiler: support for partially bound cubemaps --- .../ir/passes/resource_tracking_pass.cpp | 3 ++- src/shader_recompiler/specialization.h | 3 ++- src/video_core/amdgpu/resource.h | 7 ++++++- src/video_core/texture_cache/image_view.cpp | 11 ++++++++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 141bf20fb4..d8519a8ff3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -482,10 +482,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_storage = IsImageStorageInstruction(inst); + const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType(); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, - .type = image.GetType(), + .type = type, .nfmt = static_cast(image.GetNumberFmt()), .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index bbcafdb862..e95559d057 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -62,7 +62,8 @@ struct StageSpecialization { }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.GetType(); + spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray + : sharp.GetType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); }); } diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 41dbe801db..2b6fc829e4 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -241,7 +241,7 @@ struct Image { u32 NumLayers() const { u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; if (GetType() == ImageType::Cube) { - slices *= 6; + slices = std::max(last_array + 1, 6); } if (pow2pad) { slices = std::bit_ceil(slices); @@ -282,6 +282,11 @@ struct Image { bool IsTiled() const { return GetTilingMode() != TilingMode::Display_Linear; } + + bool IsPartialCubemap() const { + const auto viewed_slice = last_array - base_array + 1; + return GetType() == ImageType::Cube && viewed_slice < 6; + } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 1e308ccec8..87ecd3b1e9 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -68,7 +68,6 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept : is_storage{is_storage_} { - type = ConvertImageViewType(image.GetType()); const auto dfmt = image.GetDataFmt(); auto nfmt = image.GetNumberFmt(); if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) { @@ -79,10 +78,20 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexc range.base.layer = image.base_array; range.extent.levels = image.last_level + 1; range.extent.layers = image.last_array + 1; + type = ConvertImageViewType(image.GetType()); + + // Adjust view type for partial cubemaps and arrays + if (image.IsPartialCubemap()) { + type = vk::ImageViewType::e2DArray; + } + if (type == vk::ImageViewType::eCube && range.extent.layers > 6) { + type = vk::ImageViewType::eCubeArray; + } if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { // Some games pass incorrect layer count for 3D textures so we need to fixup it range.extent.layers = 1; } + if (!is_storage) { mapping.r = ConvertComponentSwizzle(image.dst_sel_x); mapping.g = ConvertComponentSwizzle(image.dst_sel_y); From 0c7fa82c0250649060f5e65fed9b2b0284d802fc Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 18 Sep 2024 22:08:33 +0200 Subject: [PATCH 10/16] added support for cubemap arrays --- .../backend/spirv/spirv_emit_context.cpp | 3 +- src/shader_recompiler/frontend/decode.cpp | 1 + .../frontend/translate/vector_memory.cpp | 2 ++ src/shader_recompiler/info.h | 5 ++-- .../ir/passes/resource_tracking_pass.cpp | 1 + src/shader_recompiler/ir/reg.h | 1 + .../renderer_vulkan/vk_instance.cpp | 1 + .../renderer_vulkan/vk_pipeline_common.cpp | 15 +++++----- .../renderer_vulkan/vk_platform.cpp | 1 - src/video_core/texture_cache/image.cpp | 6 ++-- src/video_core/texture_cache/image_view.cpp | 28 +++++++++++-------- src/video_core/texture_cache/image_view.h | 3 +- 12 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 7f70aee7c0..164c30c562 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -510,7 +510,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); case AmdGpu::ImageType::Cube: - return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format); + return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled, + format); default: break; } diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 26a2c1a6c9..6020f93bb2 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -1032,6 +1032,7 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) { m_instruction.control.mimg = *reinterpret_cast(&hexInstruction); m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode); + ASSERT(m_instruction.control.mimg.r128 == 0); } void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 212d7fdc55..7ecc2e762f 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -546,6 +546,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.explicit_lod.Assign(explicit_lod); info.has_derivatives.Assign(has_derivatives); + info.is_array.Assign(mimg.da); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { @@ -630,6 +631,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { info.has_offset.Assign(flags.test(MimgModifier::Offset)); // info.explicit_lod.Assign(explicit_lod); info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); + info.is_array.Assign(mimg.da); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d8282bf494..2ae4420b2a 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -64,9 +64,10 @@ struct ImageResource { u32 dword_offset; AmdGpu::ImageType type; AmdGpu::NumberFormat nfmt; - bool is_storage; - bool is_depth; + bool is_storage{}; + bool is_depth{}; bool is_atomic{}; + bool is_array{}; constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index d8519a8ff3..f438f2693f 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -491,6 +491,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), + .is_array = bool(inst_info.is_array), }); // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index fba04f33e7..4783d08e55 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -59,6 +59,7 @@ union TextureInstInfo { BitField<5, 1, u32> has_offset; BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; + BitField<9, 1, u32> is_array; }; union BufferInstInfo { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0bc73e14f8..8bec96cfb2 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -282,6 +282,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFeatures2{ .features{ .robustBufferAccess = features.robustBufferAccess, + .imageCubeArray = features.imageCubeArray, .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, .logicOp = features.logicOp, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 50df7aecd7..fa7fbc0b85 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -3,6 +3,7 @@ #include +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -21,7 +22,7 @@ Pipeline::~Pipeline() = default; void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, u32& binding, DescriptorWrites& set_writes) const { - using ImageBindingInfo = std::tuple; + using ImageBindingInfo = std::tuple; boost::container::static_vector image_bindings; for (const auto& image_desc : stage.images) { @@ -31,9 +32,9 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader const auto image_id = texture_cache.FindImage(image_info); auto& image = texture_cache.GetImage(image_id); image.flags |= VideoCore::ImageFlagBits::Bound; - image_bindings.emplace_back(image_id, tsharp, image_desc.is_storage); + image_bindings.emplace_back(image_id, tsharp, image_desc); } else { - image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc.is_storage); + image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc); } if (texture_cache.IsMeta(tsharp.Address())) { @@ -42,7 +43,7 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader } // Second pass to re-bind images that were updated after binding - for (auto [image_id, tsharp, is_storage] : image_bindings) { + for (auto [image_id, tsharp, desc] : image_bindings) { if (!image_id) { if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); @@ -56,7 +57,7 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) { image_id = texture_cache.FindImage(image.info); } - VideoCore::ImageViewInfo view_info{tsharp, is_storage}; + VideoCore::ImageViewInfo view_info{tsharp, desc}; auto& image_view = texture_cache.FindTexture(image_id, view_info); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, texture_cache.GetImage(image_id).last_state.layout); @@ -69,8 +70,8 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader .dstBinding = binding++, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = - is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, + .descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .pImageInfo = &image_infos.back(), }); } diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index feadda96c8..6abd00aaa5 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -44,7 +44,6 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0xc81ad50e: case 0xb7c39078: case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE - case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; default: break; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index bec11ea1d9..cd6119a7e1 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -200,11 +200,13 @@ boost::container::small_vector Image::GetBarriers( // resource transition for the next time. const auto mips = needs_partial_transition - ? std::ranges::views::iota(subres_range->base.level, subres_range->extent.levels) + ? std::ranges::views::iota(subres_range->base.level, + subres_range->base.level + subres_range->extent.levels) : std::views::iota(0u, info.resources.levels); const auto layers = needs_partial_transition - ? std::ranges::views::iota(subres_range->base.layer, subres_range->extent.layers) + ? std::ranges::views::iota(subres_range->base.layer, + subres_range->base.layer + subres_range->extent.layers) : std::views::iota(0u, info.resources.layers); for (u32 mip : mips) { diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 87ecd3b1e9..a9a1a6c639 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -66,8 +67,8 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { return format; } -ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept - : is_storage{is_storage_} { +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept + : is_storage{desc.is_storage} { const auto dfmt = image.GetDataFmt(); auto nfmt = image.GetNumberFmt(); if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) { @@ -76,19 +77,24 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexc format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt); range.base.level = image.base_level; range.base.layer = image.base_array; - range.extent.levels = image.last_level + 1; - range.extent.layers = image.last_array + 1; + range.extent.levels = image.last_level - image.base_level + 1; + range.extent.layers = image.last_array - image.base_array + 1; type = ConvertImageViewType(image.GetType()); // Adjust view type for partial cubemaps and arrays if (image.IsPartialCubemap()) { type = vk::ImageViewType::e2DArray; } - if (type == vk::ImageViewType::eCube && range.extent.layers > 6) { - type = vk::ImageViewType::eCubeArray; + if (type == vk::ImageViewType::eCube) { + if (desc.is_array) { + type = vk::ImageViewType::eCubeArray; + } else { + // Some games try to bind an array of cubemaps while shader reads only single one. + range.extent.layers = std::min(range.extent.layers, 6u); + } } if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { - // Some games pass incorrect layer count for 3D textures so we need to fixup it + // Some games pass incorrect layer count for 3D textures so we need to fixup it. range.extent.layers = 1; } @@ -116,7 +122,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; - range.extent.layers = col_buffer.NumSlices(); + range.extent.layers = col_buffer.NumSlices() - range.base.layer; format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } @@ -128,7 +134,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, depth_buffer.stencil_info.format); is_storage = ctl.depth_write_enable; range.base.layer = view.slice_start; - range.extent.layers = view.NumSlices(); + range.extent.layers = view.NumSlices() - range.base.layer; } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, @@ -160,9 +166,9 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .subresourceRange{ .aspectMask = aspect, .baseMipLevel = info.range.base.level, - .levelCount = info.range.extent.levels - info.range.base.level, + .levelCount = info.range.extent.levels, .baseArrayLayer = info.range.base.layer, - .layerCount = info.range.extent.layers - info.range.base.layer, + .layerCount = info.range.extent.layers, }, }; image_view = instance.GetDevice().createImageViewUnique(image_view_ci); diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 7d53590dd8..ba8d2c72b2 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -3,6 +3,7 @@ #pragma once +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -17,7 +18,7 @@ namespace VideoCore { struct ImageViewInfo { ImageViewInfo() = default; - ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; + ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept; ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); From 2f689972e4e5b14b1c1ca0fe66dfe7a790cfbe25 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 19 Sep 2024 21:12:33 +0200 Subject: [PATCH 11/16] don't bind unused color buffers --- .../frontend/translate/export.cpp | 3 + src/shader_recompiler/info.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.h | 5 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 73 ++++++++++--------- .../renderer_vulkan/vk_rasterizer.cpp | 13 +++- .../renderer_vulkan/vk_rasterizer.h | 2 +- 6 files changed, 60 insertions(+), 37 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 18e830f7b4..7d901822d9 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -71,6 +71,9 @@ void Translator::EmitExport(const GcnInst& inst) { ir.SetAttribute(attrib, comp, swizzle(i)); } } + if (IR::IsMrt(attrib)) { + info.mrt_mask |= 1u << u8(attrib); + } } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 2ae4420b2a..ac623253b1 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -172,6 +172,7 @@ struct Info { bool uses_fp64{}; bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions + u8 mrt_mask{0u}; explicit Info(Stage stage_, ShaderParams params) : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 77d6a0adbf..74817656a6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -33,6 +33,7 @@ struct GraphicsPipelineKey { Liverpool::DepthControl depth_stencil; u32 depth_bias_enable; u32 num_samples; + u32 mrt_mask; Liverpool::StencilControl stencil; Liverpool::PrimitiveType prim_type; u32 enable_primitive_restart; @@ -74,6 +75,10 @@ class GraphicsPipeline : public Pipeline { return key.write_masks; } + auto GetMrtMask() const { + return key.mrt_mask; + } + bool IsDepthEnabled() const { return key.depth_stencil.depth_enable.Value(); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7f6079a5c7..a9da36e2bc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -234,39 +234,6 @@ bool PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; key.num_samples = regs.aa_config.NumSamples(); - const auto skip_cb_binding = - regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - - // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color - // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. - // We need to do some arrays compaction at this stage - key.color_formats.fill(vk::Format::eUndefined); - key.blend_controls.fill({}); - key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); - int remapped_cb{}; - for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { - auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { - continue; - } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); - key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } - key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && - !col_buf.info.blend_bypass); - key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; - key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); - - ++remapped_cb; - } - u32 binding{}; for (u32 i = 0; i < MaxShaderStages; i++) { if (!regs.stage_enable.IsStageEnabled(i)) { @@ -309,6 +276,46 @@ bool PipelineCache::RefreshGraphicsKey() { std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } + + const auto* fs_info = infos[u32(Shader::Stage::Fragment)]; + key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; + + const auto skip_cb_binding = + regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; + + // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. + // We need to do some arrays compaction at this stage + key.color_formats.fill(vk::Format::eUndefined); + key.blend_controls.fill({}); + key.write_masks.fill({}); + key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + int remapped_cb{}; + for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { + continue; + } + if ((key.mrt_mask & (1u << cb)) == 0) { + continue; + } + const auto base_format = + LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); + const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); + key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( + base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + if (base_format == key.color_formats[remapped_cb]) { + key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); + } + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); + key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + + ++remapped_cb; + } + return true; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9d8b425236..eac272726a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -62,7 +62,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); - BeginRendering(); + BeginRendering(*pipeline); UpdateDynamicState(*pipeline); const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(); @@ -102,7 +102,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0); - BeginRendering(); + BeginRendering(*pipeline); UpdateDynamicState(*pipeline); const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); @@ -179,7 +179,7 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::BeginRendering() { +void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { const auto& regs = liverpool->regs; RenderState state; @@ -199,6 +199,13 @@ void Rasterizer::BeginRendering() { continue; } + // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform + // an unnecessary transition and may result in state conflict if the resource is already + // bound for reading. + if ((pipeline.GetMrtMask() & (1 << col_buf_id)) == 0) { + continue; + } + const auto& hint = liverpool->last_cb_extent[col_buf_id]; VideoCore::ImageInfo image_info{col_buf, hint}; VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 5aa90c5ccc..bd05c8faf4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -52,7 +52,7 @@ class Rasterizer { void Finish(); private: - void BeginRendering(); + void BeginRendering(const GraphicsPipeline& pipeline); void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); From 183523a5c761c6a5be3b8974b0d9689a4ccd0bbe Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 19 Sep 2024 23:02:10 +0200 Subject: [PATCH 12/16] fixed depth promotion to do not use stencil --- src/video_core/renderer_vulkan/liverpool_to_vk.h | 10 ++++++++++ src/video_core/texture_cache/image_info.cpp | 8 +------- src/video_core/texture_cache/image_view.cpp | 3 +++ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 8432d21417..f5d10d48f7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include +#include "common/assert.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -55,4 +56,13 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags support void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); +static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { + if (fmt == vk::Format::eR32Sfloat) { + return vk::Format::eD32Sfloat; + } else if (fmt == vk::Format::eR16Unorm) { + return vk::Format::eD16Unorm; + } + UNREACHABLE(); +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 011e19db8f..8e4a2acf05 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -205,13 +205,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target if (force_depth) { - if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) { - pixel_format = vk::Format::eD32SfloatS8Uint; - } else if (pixel_format == vk::Format::eR16Unorm) { - pixel_format = vk::Format::eD16UnormS8Uint; - } else { - UNREACHABLE(); - } + pixel_format = LiverpoolToVK::PromoteFormatToDepth(pixel_format); } type = ConvertImageType(image.GetType()); props.is_tiled = image.IsTiled(); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index a9a1a6c639..2aad1afb61 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -75,6 +75,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso nfmt = AmdGpu::NumberFormat::Unorm; } format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt); + if (desc.is_depth) { + format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format); + } range.base.level = image.base_level; range.base.layer = image.base_array; range.extent.levels = image.last_level - image.base_level + 1; From dbeb45737ecc38b944595468b699c5c992663229 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 20 Sep 2024 21:26:00 +0200 Subject: [PATCH 13/16] doors --- src/video_core/amdgpu/resource.h | 9 +++++++-- src/video_core/renderer_vulkan/vk_pipeline_common.cpp | 2 +- src/video_core/texture_cache/image.cpp | 2 +- src/video_core/texture_cache/image_info.cpp | 6 +++--- src/video_core/texture_cache/image_info.h | 3 ++- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 2b6fc829e4..fc572a04b9 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -238,10 +238,15 @@ struct Image { return pitch + 1; } - u32 NumLayers() const { + u32 NumLayers(bool is_array) const { u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; if (GetType() == ImageType::Cube) { - slices = std::max(last_array + 1, 6); + if (is_array) { + slices = last_array + 1; + ASSERT(slices % 6 == 0); + } else { + slices = 6; + } } if (pow2pad) { slices = std::bit_ceil(slices); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index fa7fbc0b85..7702960269 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -28,7 +28,7 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader for (const auto& image_desc : stage.images) { const auto tsharp = image_desc.GetSharp(stage); if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; + VideoCore::ImageInfo image_info{tsharp, image_desc}; const auto image_id = texture_cache.FindImage(image_info); auto& image = texture_cache.GetImage(image_id); image.flags |= VideoCore::ImageFlagBits::Bound; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index cd6119a7e1..5c8bf54f48 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -125,7 +125,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - if (info.props.is_cube) { + if (info.props.is_cube || (info.type == vk::ImageType::e2D && info.resources.layers >= 6)) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; } else if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 8e4a2acf05..521e4118fa 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -200,11 +200,11 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice mips_layout.emplace_back(depth_slice_sz, pitch, 0); } -ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) noexcept { +ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept { tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target - if (force_depth) { + if (desc.is_depth) { pixel_format = LiverpoolToVK::PromoteFormatToDepth(pixel_format); } type = ConvertImageType(image.GetType()); @@ -218,7 +218,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); - resources.layers = image.NumLayers(); + resources.layers = image.NumLayers(desc.is_array); num_bits = NumBits(image.GetDataFmt()); usage.texture = true; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index ba8985b8ff..2ae2547f7e 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -5,6 +5,7 @@ #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/texture_cache/types.h" @@ -19,7 +20,7 @@ struct ImageInfo { const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - ImageInfo(const AmdGpu::Image& image, bool force_depth = false) noexcept; + ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; bool IsTiled() const { return tiling_mode != AmdGpu::TilingMode::Display_Linear; From f9bb02d6122bfcbb8047edd5a4493d1c9de966d6 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 21 Sep 2024 16:08:48 +0200 Subject: [PATCH 14/16] bonfire lit --- .../renderer_vulkan/vk_pipeline_cache.cpp | 80 ++++++++++--------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a9da36e2bc..b6b0ca4bda 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -234,6 +234,49 @@ bool PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; key.num_samples = regs.aa_config.NumSamples(); + const bool skip_cb_binding = + regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; + + const auto& BindColorBuffers = [&](u32 mrt_mask) { + // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary + // order. We need to do some arrays compaction at this stage + key.color_formats.fill(vk::Format::eUndefined); + key.blend_controls.fill({}); + key.write_masks.fill({}); + key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + + int remapped_cb{}; + for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { + continue; + } + if ((mrt_mask & (1u << cb)) == 0) { + continue; + } + const auto base_format = + LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); + const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); + key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( + base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + if (base_format == key.color_formats[remapped_cb]) { + key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); + } + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); + key.write_masks[remapped_cb] = + vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + + ++remapped_cb; + } + }; + + // We need to run CB binding twice as actual MRT exports are unknown before FS is compiled. + BindColorBuffers(0xff); + u32 binding{}; for (u32 i = 0; i < MaxShaderStages; i++) { if (!regs.stage_enable.IsStageEnabled(i)) { @@ -279,42 +322,7 @@ bool PipelineCache::RefreshGraphicsKey() { const auto* fs_info = infos[u32(Shader::Stage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; - - const auto skip_cb_binding = - regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - - // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color - // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. - // We need to do some arrays compaction at this stage - key.color_formats.fill(vk::Format::eUndefined); - key.blend_controls.fill({}); - key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); - int remapped_cb{}; - for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { - auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { - continue; - } - if ((key.mrt_mask & (1u << cb)) == 0) { - continue; - } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); - key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } - key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && - !col_buf.info.blend_bypass); - key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; - key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); - - ++remapped_cb; - } + BindColorBuffers(key.mrt_mask); return true; } From 611e5bc88930c331dcd6adda90300bd0598cc2a9 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 21 Sep 2024 19:16:47 +0200 Subject: [PATCH 15/16] cubemap array index calculation --- .../ir/passes/resource_tracking_pass.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f438f2693f..db0d75f0c2 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -442,18 +442,29 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, - const IR::Value& z, bool is_storage) { + const IR::Value& z, bool is_storage, bool is_array) { // When cubemap is written with imageStore it is treated like 2DArray. if (is_storage) { return ir.CompositeConstruct(s, t, z); } + + ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below + // We need to fix x and y coordinate, // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32. // We already force the scale value to be 1.0 when handling v_cubema_f32, // here we subtract 1.5 to recover the original value. const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f)); const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f)); - return ir.CompositeConstruct(x, y, z); + if (is_array) { + const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z}); + const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u)); + const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u)); + return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id), + ir.ConvertIToF(32, 32, false, slice_id)); + } else { + return ir.CompositeConstruct(x, y, z); + } } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { @@ -548,7 +559,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip case AmdGpu::ImageType::Color3D: // x, y, z return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; case AmdGpu::ImageType::Cube: // x, y, face - return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage), + return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage, + inst_info.is_array), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); From 1aeffbe1014710bc558628a31222b1bca57318b2 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 21 Sep 2024 21:17:57 +0200 Subject: [PATCH 16/16] final touches --- .../renderer_vulkan/vk_pipeline_cache.cpp | 80 ++++++++++--------- src/video_core/texture_cache/image.cpp | 4 +- .../texture_cache/texture_cache.cpp | 9 ++- 3 files changed, 51 insertions(+), 42 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b6b0ca4bda..4fc1f46edf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -237,45 +237,32 @@ bool PipelineCache::RefreshGraphicsKey() { const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - const auto& BindColorBuffers = [&](u32 mrt_mask) { - // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color - // attachments. This might be not a case as HW color buffers can be bound in an arbitrary - // order. We need to do some arrays compaction at this stage - key.color_formats.fill(vk::Format::eUndefined); - key.blend_controls.fill({}); - key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); - - int remapped_cb{}; - for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { - auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { - continue; - } - if ((mrt_mask & (1u << cb)) == 0) { - continue; - } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); - key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } - key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && - !col_buf.info.blend_bypass); - key.write_masks[remapped_cb] = - vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; - key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); - - ++remapped_cb; + // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary + // order. We need to do some arrays compaction at this stage + key.color_formats.fill(vk::Format::eUndefined); + key.blend_controls.fill({}); + key.write_masks.fill({}); + key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + + // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader + // recompiler. + for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { + continue; + } + const auto base_format = + LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); + const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); + key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( + base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + if (base_format == key.color_formats[remapped_cb]) { + key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); } - }; - // We need to run CB binding twice as actual MRT exports are unknown before FS is compiled. - BindColorBuffers(0xff); + ++remapped_cb; + } u32 binding{}; for (u32 i = 0; i < MaxShaderStages; i++) { @@ -322,8 +309,25 @@ bool PipelineCache::RefreshGraphicsKey() { const auto* fs_info = infos[u32(Shader::Stage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; - BindColorBuffers(key.mrt_mask); + // Second pass to fill remain CB pipeline key data + for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) || + (key.mrt_mask & (1u << cb)) == 0) { + key.color_formats[cb] = vk::Format::eUndefined; + key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard; + continue; + } + + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); + key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + + ++remapped_cb; + } return true; } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 5c8bf54f48..4ce6e1eea6 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -213,7 +213,9 @@ boost::container::small_vector Image::GetBarriers( for (u32 layer : layers) { // NOTE: these loops may produce a lot of small barriers. // If this becomes a problem, we can optimize it by merging adjacent barriers. - auto& state = subresource_states[mip * info.resources.layers + layer]; + const auto subres_idx = mip * info.resources.layers + layer; + ASSERT(subres_idx < subresource_states.size()); + auto& state = subresource_states[subres_idx]; if (state.layout != dst_layout || state.access_mask != dst_mask) { barriers.emplace_back(vk::ImageMemoryBarrier2{ diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index ef402ecc40..dfa1bab648 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -87,8 +87,7 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image auto new_image_id = slot_images.insert(instance, scheduler, requested_info); RegisterImage(new_image_id); - // auto& new_image = slot_images[new_image_id]; - // TODO: need to run a helper for depth copy here + // TODO: perform a depth copy here FreeImage(cache_image_id); return new_image_id; @@ -98,7 +97,11 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image !requested_info.usage.depth_target && (requested_info.usage.texture || requested_info.usage.storage); if (cache_info.usage.depth_target && should_bind_as_texture) { - return cache_image_id; + if (cache_info.resources == requested_info.resources) { + return cache_image_id; + } else { + UNREACHABLE(); + } } return {};