From 87e9d2eba505ded52b6575c9eead6e758a2b5cc4 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 2 Apr 2024 21:26:50 +0100 Subject: [PATCH] GS/HW: Rearrange color on shuffle if SW Blend or TFX --- bin/resources/shaders/dx11/tfx.fx | 23 +++++++++++++++++++++-- bin/resources/shaders/opengl/tfx_fs.glsl | 19 +++++++++++++++++-- bin/resources/shaders/vulkan/tfx.glsl | 19 +++++++++++++++++-- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 4 ++-- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 1 - pcsx2/GS/Renderers/Metal/tfx.metal | 23 +++++++++++++++++++++-- pcsx2/ShaderCacheVersion.h | 2 +- 7 files changed, 79 insertions(+), 12 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 3491ec402f62d9..7bd82941d6154a 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -766,7 +766,7 @@ float4 ps_color(PS_INPUT input) float4 T = sample_color(st, input.t.w); #endif - if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) + if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_before = uint4(T); if (PS_PROCESS_BA & SHUFFLE_READ) @@ -866,6 +866,25 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f; + if (PS_SHUFFLE && SW_BLEND_NEEDS_RT) + { + uint4 denorm_rt = uint4(RT); + if (PS_PROCESS_BA & SHUFFLE_WRITE) + { + RT.r = float((denorm_rt.b << 3) & 0xF8); + RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0)); + RT.b = float((denorm_rt.a << 1) & 0xF8); + RT.a = float(denorm_rt.a & 0x80); + } + else + { + RT.r = float((denorm_rt.r << 3) & 0xF8); + RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0)); + RT.b = float((denorm_rt.g << 1) & 0xF8); + RT.a = float(denorm_rt.g & 0x80); + } + } + float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f; float3 Cd = trunc(RT.rgb * 255.0f + 0.1f); float3 Cs = Color.rgb; @@ -1037,7 +1056,7 @@ PS_OUTPUT ps_main(PS_INPUT input) if (PS_SHUFFLE) { - if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) + if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_after = uint4(C); if (PS_PROCESS_BA & SHUFFLE_READ) diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 0399fec82bf343..41e054e431b3fd 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -686,7 +686,7 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) + #if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8); @@ -807,6 +807,21 @@ float As = As_rgba.a; float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; #endif + #if PS_SHUFFLE && SW_BLEND_NEEDS_RT + uvec4 denorm_rt = uvec4(RT); + #if (PS_PROCESS_BA & SHUFFLE_WRITE) + RT.r = float((denorm_rt.b << 3) & 0xF8); + RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0)); + RT.b = float((denorm_rt.a << 1) & 0xF8); + RT.a = float(denorm_rt.a & 0x80); + #else + RT.r = float((denorm_rt.r << 3) & 0xF8); + RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0)); + RT.b = float((denorm_rt.g << 1) & 0xF8); + RT.a = float(denorm_rt.g & 0x80); + #endif + #endif + // Let the compiler do its jobs ! vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f); vec3 Cs = Color.rgb; @@ -1024,7 +1039,7 @@ void ps_main() #if PS_SHUFFLE - #if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) + #if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index f6ba9192717e52..30a7038ef20025 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -953,7 +953,7 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) + #if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8); @@ -1073,6 +1073,21 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) #else float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; #endif + + #if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED + uvec4 denorm_rt = uvec4(RT); + #if (PS_PROCESS_BA & SHUFFLE_WRITE) + RT.r = float((denorm_rt.b << 3) & 0xF8); + RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0)); + RT.b = float((denorm_rt.a << 1) & 0xF8); + RT.a = float(denorm_rt.a & 0x80); + #else + RT.r = float((denorm_rt.r << 3) & 0xF8); + RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0)); + RT.b = float((denorm_rt.g << 1) & 0xF8); + RT.a = float(denorm_rt.g & 0x80); + #endif + #endif // Let the compiler do its jobs ! vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f); @@ -1289,7 +1304,7 @@ void main() ps_blend(C, alpha_blend); #if PS_SHUFFLE - #if SW_BLEND && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) + #if (SW_BLEND || PS_TFX != 1) && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 1c49ae5d8e5ece..907528e14371fb 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -4010,7 +4010,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT const bool blend_mix1 = !!(blend_flag & BLEND_MIX1); const bool blend_mix2 = !!(blend_flag & BLEND_MIX2); const bool blend_mix3 = !!(blend_flag & BLEND_MIX3); - bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP; + bool blend_mix = (blend_mix1 || blend_mix2 || blend_mix3) && COLCLAMP.CLAMP && !m_texture_shuffle; const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked; // Primitives don't overlap. @@ -4032,7 +4032,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT // Warning no break on purpose // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. - bool sw_blending = false; + bool sw_blending = m_texture_shuffle; if (features.texture_barrier) { const bool blend_requires_barrier = (blend_flag & BLEND_A_MAX) // Impossible blending diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 0fafbe22044efd..a154c2b9863ae1 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -948,7 +948,6 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c // FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ??? if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - pxAssert(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); dst = t; inside_target = false; break; diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 1f1d7d6c47f79d..21904ca7b0915f 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -831,7 +831,7 @@ struct PSMain else T = sample_color(st); - if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) + if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_before = uint4(T); if (PS_PROCESS_BA & SHUFFLE_READ) @@ -936,6 +936,25 @@ struct PSMain float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 128.1f) / 128.f : trunc(current_color.a * 255.1f) / 128.f; + if (PS_SHUFFLE && NEEDS_RT) + { + uint4 denorm_rt = uint4(RT); + if (PS_PROCESS_BA & SHUFFLE_WRITE) + { + RT.r = float((denorm_rt.b << 3) & 0xF8); + RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0)); + RT.b = float((denorm_rt.a << 1) & 0xF8); + RT.a = float(denorm_rt.a & 0x80); + } + else + { + RT.r = float((denorm_rt.r << 3) & 0xF8); + RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0)); + RT.b = float((denorm_rt.g << 1) & 0xF8); + RT.a = float(denorm_rt.g & 0x80); + } + } + float3 Cd = trunc(current_color.rgb * 255.5f); float3 Cs = Color.rgb; @@ -1105,7 +1124,7 @@ struct PSMain if (PS_SHUFFLE) { - if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) + if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_after = uint4(C); if (PS_PROCESS_BA & SHUFFLE_READ) diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index f73a8033768612..3893e28d847f0b 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 44; +static constexpr u32 SHADER_CACHE_VERSION = 45;