From 1a415184e9390636572bedf86ed70acf6c36208f Mon Sep 17 00:00:00 2001 From: lightningterror <18107717+lightningterror@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:47:45 +0200 Subject: [PATCH] GS/HW: Extend blend second pass to more blend formulas. `Cs + Cd*Ad, Cs - Cd*Ad, Cd*(1 - Ad), Cs*(1 + Ad), Cs*(1 - Ad).` --- bin/resources/shaders/dx11/tfx.fx | 5 +++ bin/resources/shaders/opengl/tfx_fs.glsl | 3 ++ bin/resources/shaders/vulkan/tfx.glsl | 3 ++ pcsx2/GS/Renderers/Common/GSDevice.cpp | 10 ++--- pcsx2/GS/Renderers/Common/GSDevice.h | 18 +++++--- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 56 +++++++++++++++++++++--- pcsx2/ShaderCacheVersion.h | 2 +- 7 files changed, 77 insertions(+), 20 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 3aa6f150449be..a303be7c91fca 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -972,6 +972,11 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) float color_compensate = 255.0f / max(128.0f, max_color); Color.rgb *= (float3)color_compensate; } + else if (PS_BLEND_HW == 4) + { + // Needed for Cd * (1 - Ad) + Color.rgb = (float3)128.0f; + } } } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 41e054e431b3f..f9d1449e43f4d 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -933,6 +933,9 @@ float As = As_rgba.a; float max_color = max(max(Color.r, Color.g), Color.b); float color_compensate = 255.0f / max(128.0f, max_color); Color.rgb *= vec3(color_compensate); +#elif PS_BLEND_HW == 4 + // Needed for Cd * (1 - Ad) + Color.rgb = vec3(128.0f); #endif #endif diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 30a7038ef2002..ede0d6a239043 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1200,6 +1200,9 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) float max_color = max(max(Color.r, Color.g), Color.b); float color_compensate = 255.0f / max(128.0f, max_color); Color.rgb *= vec3(color_compensate); + #elif PS_BLEND_HW == 4 + // Needed for Cd * (1 - Ad) + Color.rgb = vec3(128.0f); #endif #endif } diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index eae1ae9eaa4e0..36a402400cf51 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -889,7 +889,7 @@ const std::array GSDevice::m_blendMap = { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) { BLEND_ACCU , OP_ADD , SRC1_COLOR , CONST_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { BLEND_NO_REC , OP_ADD , SRC1_COLOR , CONST_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As - { BLEND_A_MAX , OP_ADD , CONST_ONE , CONST_ZERO} , // 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) + { BLEND_A_MAX | BLEND_HW8 , OP_ADD , CONST_ONE , CONST_ZERO} , // 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) { BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd { BLEND_HW3 , OP_ADD , DST_ALPHA , CONST_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad { BLEND_NO_REC , OP_ADD , CONST_ONE , CONST_ZERO} , // 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) @@ -916,7 +916,7 @@ const std::array GSDevice::m_blendMap = { BLEND_HW4 , OP_ADD , CONST_ONE , SRC1_COLOR} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As { BLEND_HW1 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) { BLEND_HW2 , OP_ADD , DST_COLOR , SRC1_COLOR} , // 1202: (Cd - 0)*As + 0 ==> Cd*As - { 0 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad + { BLEND_HW6 , OP_ADD , CONST_ONE , DST_ALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad { BLEND_HW1 , OP_ADD , DST_COLOR , DST_ALPHA} , // 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) { BLEND_HW5 , OP_ADD , CONST_ZERO , DST_ALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad { BLEND_HW4 , OP_ADD , CONST_ONE , CONST_COLOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F @@ -925,7 +925,7 @@ const std::array GSDevice::m_blendMap = { BLEND_NO_REC , OP_ADD , INV_SRC1_COLOR , CONST_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) { BLEND_ACCU , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As { BLEND_NO_REC , OP_REV_SUBTRACT , SRC1_COLOR , CONST_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As - { 0 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) + { BLEND_HW9 , OP_ADD , INV_DST_ALPHA , CONST_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) { BLEND_HW3 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad { 0 , OP_REV_SUBTRACT , DST_ALPHA , CONST_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad { BLEND_NO_REC , OP_ADD , INV_CONST_COLOR , CONST_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) @@ -934,8 +934,8 @@ const std::array GSDevice::m_blendMap = { BLEND_HW4 , OP_SUBTRACT , CONST_ONE , SRC1_COLOR} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As { 0 , OP_ADD , CONST_ZERO , INV_SRC1_COLOR} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) { 0 , OP_SUBTRACT , CONST_ZERO , SRC1_COLOR} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As - { 0 , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad - { 0 , OP_ADD , CONST_ZERO , INV_DST_ALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) + { BLEND_HW6 , OP_SUBTRACT , CONST_ONE , DST_ALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad + { BLEND_HW7 , OP_ADD , CONST_ZERO , INV_DST_ALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) { 0 , OP_SUBTRACT , CONST_ZERO , DST_ALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad { BLEND_HW4 , OP_SUBTRACT , CONST_ONE , CONST_COLOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F { 0 , OP_ADD , CONST_ZERO , INV_CONST_COLOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 72fd685f47301..ae0d0904cfeda 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -227,12 +227,16 @@ enum HWBlendFlags BLEND_HW3 = 0x8, // Multiply Cs by (255/128) to compensate for wrong Ad/255 value, should be Ad/128 BLEND_HW4 = 0x10, // HW rendering is split in 2 passes BLEND_HW5 = 0x20, // HW rendering is split in 2 passes - BLEND_MIX1 = 0x40, // Mix of hw and sw, do Cs*F or Cs*As in shader - BLEND_MIX2 = 0x80, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader - BLEND_MIX3 = 0x100, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader - BLEND_ACCU = 0x200, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds - BLEND_NO_REC = 0x400, // Doesn't require sampling of the RT as a texture - BLEND_A_MAX = 0x800, // Impossible blending uses coeff bigger than 1 + BLEND_HW6 = 0x40, // HW rendering is split in 2 passes + BLEND_HW7 = 0x80, // HW rendering is split in 2 passes + BLEND_HW8 = 0x100, // HW rendering is split in 2 passes + BLEND_HW9 = 0x200, // HW rendering is split in 2 passes + BLEND_MIX1 = 0x400, // Mix of hw and sw, do Cs*F or Cs*As in shader + BLEND_MIX2 = 0x800, // Mix of hw and sw, do Cs*(As + 1) or Cs*(F + 1) in shader + BLEND_MIX3 = 0x1000, // Mix of hw and sw, do Cs*(1 - As) or Cs*(1 - F) in shader + BLEND_ACCU = 0x2000, // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds + BLEND_NO_REC = 0x4000, // Doesn't require sampling of the RT as a texture + BLEND_A_MAX = 0x8000, // Impossible blending uses coeff bigger than 1 }; // Determines the HW blend function for DX11/OGL @@ -332,7 +336,7 @@ struct alignas(16) GSHWDrawConfig u32 blend_c : 2; u32 blend_d : 2; u32 fixed_one_a : 1; - u32 blend_hw : 2; + u32 blend_hw : 3; u32 a_masked : 1; u32 hdr : 1; u32 rta_correction : 1; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 0647966f1aa54..58845f3c8168e 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -4050,7 +4050,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT // HW blend can be done in multiple passes when there's no overlap. // Blend second pass is only useful when texture barriers aren't supported. // Speed wise Texture barriers > blend second pass > texture copies. - // TODO: 24bit and 32bit formats on clamp 1 can always prefer blend second pass depending on the blend equations. const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend; const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2; // We don't want to enable blend mix if we are doing a second pass, it's useless. @@ -4464,9 +4463,52 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.blend_second_pass.blend_hw = 1; m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; } - - if (m_conf.ps.blend_c == 2 && m_conf.blend_second_pass.enable) - m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast(AFIX) / 128.0f; + else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW6)) + { + // Alpha = Ad. + // Cs + Cd*Alpha, Cs - Cd*Alpha. + // Render pass 1: Multiply Cs by 0.5, then do hw blend. + m_conf.ps.blend_c = 2; + AFIX = 64; + blend.src = GSDevice::CONST_COLOR; + // Render pass 2: Take result (Cd) from render pass 1 and double it. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend_hw = 1; + m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } + else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW7)) + { + // Alpha = Ad. + // Cd*(1 - Alpha). + // Render pass 1: Multiply Cd by 0.5, then do Cd - Cd*Alpha. + m_conf.ps.blend_hw = 4; + blend.src = GSDevice::DST_COLOR; + blend.dst = GSDevice::DST_ALPHA; + blend.op = GSDevice::OP_SUBTRACT; + // Render pass 2: Take result (Cd) from render pass 1 and double it. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend_hw = 1; + m_conf.blend_second_pass.blend = {true, GSDevice::DST_COLOR, GSDevice::CONST_ONE, GSDevice::OP_ADD, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } + else if (blend_flag & BLEND_HW8) + { + // Alpha = Ad. + // Cs*(1 + Alpha). + // Render pass 1: Do Cs. + // Render pass 2: Try to double Cs, then take result (Cd) from render pass 1 and add Cs*Alpha to it. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend_hw = 3; + m_conf.blend_second_pass.blend = {true, GSDevice::DST_ALPHA, GSDevice::CONST_ONE, blend_second_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } + else if (alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW9)) + { + // Alpha = Ad. + // Cs*(1 - Alpha). + // Render pass 1: Do Cs*(1 - Alpha). + // Render pass 2: Take result (Cd) from render pass 1 and subtract Cs*Alpha from it. + m_conf.blend_second_pass.enable = true; + m_conf.blend_second_pass.blend = {true, GSDevice::DST_ALPHA, GSDevice::CONST_ONE, GSDevice::OP_REV_SUBTRACT, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; + } } if (blend_flag & BLEND_HW1) @@ -4475,9 +4517,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT } else if (blend_flag & BLEND_HW2) { - if (m_conf.ps.blend_c == 2) - m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast(AFIX) / 128.0f; - m_conf.ps.blend_hw = 2; } else if (!m_conf.blend_second_pass.enable && alpha_c1_high_no_rta_correct && (blend_flag & BLEND_HW3)) @@ -4485,6 +4524,9 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.blend_hw = 3; } + if (m_conf.ps.blend_c == 2 && (m_conf.ps.blend_hw == 2 || m_conf.blend_second_pass.blend_hw == 2)) + m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast(AFIX) / 128.0f; + const GSDevice::BlendFactor src_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ZERO : GSDevice::CONST_ONE; const GSDevice::BlendFactor dst_factor_alpha = m_conf.blend_second_pass.enable ? GSDevice::CONST_ONE : GSDevice::CONST_ZERO; m_conf.blend = {true, blend.src, blend.dst, blend.op, src_factor_alpha, dst_factor_alpha, m_conf.ps.blend_c == 2, AFIX}; diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 12a331b52db98..f8d466ead6f7c 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 46; +static constexpr u32 SHADER_CACHE_VERSION = 47;