Skip to content

Commit

Permalink
GS/HW: Rearrange color on shuffle if SW Blend or TFX
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Apr 8, 2024
1 parent b1f4f67 commit 30f4e77
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 10 deletions.
23 changes: 21 additions & 2 deletions bin/resources/shaders/dx11/tfx.fx
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ float4 ps_color(PS_INPUT input)
float4 T = sample_color(st, input.t.w);
#endif

if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_before = uint4(T);
if (PS_PROCESS_BA & SHUFFLE_READ)
Expand Down Expand Up @@ -866,6 +866,25 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)

float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f;

if (PS_SHUFFLE && SW_BLEND_NEEDS_RT)
{
uint4 denorm_rt = uint4(RT);
if (PS_PROCESS_BA & SHUFFLE_WRITE)
{
RT.r = float((denorm_rt.b << 3) & 0xF8);
RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
RT.b = float((denorm_rt.a << 1) & 0xF8);
RT.a = float(denorm_rt.a & 0x80);
}
else
{
RT.r = float((denorm_rt.r << 3) & 0xF8);
RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
RT.b = float((denorm_rt.g << 1) & 0xF8);
RT.a = float(denorm_rt.g & 0x80);
}
}

float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f;
float3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
float3 Cs = Color.rgb;
Expand Down Expand Up @@ -1037,7 +1056,7 @@ PS_OUTPUT ps_main(PS_INPUT input)

if (PS_SHUFFLE)
{
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_after = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ)
Expand Down
19 changes: 17 additions & 2 deletions bin/resources/shaders/opengl/tfx_fs.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif

#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8);
Expand Down Expand Up @@ -807,6 +807,21 @@ float As = As_rgba.a;
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif

#if PS_SHUFFLE && SW_BLEND_NEEDS_RT
uvec4 denorm_rt = uvec4(RT);
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
RT.r = float((denorm_rt.b << 3) & 0xF8);
RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
RT.b = float((denorm_rt.a << 1) & 0xF8);
RT.a = float(denorm_rt.a & 0x80);
#else
RT.r = float((denorm_rt.r << 3) & 0xF8);
RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
RT.b = float((denorm_rt.g << 1) & 0xF8);
RT.a = float(denorm_rt.g & 0x80);
#endif
#endif

// Let the compiler do its jobs !
vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
vec3 Cs = Color.rgb;
Expand Down Expand Up @@ -1024,7 +1039,7 @@ void ps_main()


#if PS_SHUFFLE
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
Expand Down
19 changes: 17 additions & 2 deletions bin/resources/shaders/vulkan/tfx.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif

#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8);
Expand Down Expand Up @@ -1073,6 +1073,21 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
#else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif

#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED
uvec4 denorm_rt = uvec4(RT);
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
RT.r = float((denorm_rt.b << 3) & 0xF8);
RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
RT.b = float((denorm_rt.a << 1) & 0xF8);
RT.a = float(denorm_rt.a & 0x80);
#else
RT.r = float((denorm_rt.r << 3) & 0xF8);
RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
RT.b = float((denorm_rt.g << 1) & 0xF8);
RT.a = float(denorm_rt.g & 0x80);
#endif
#endif

// Let the compiler do its jobs !
vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
Expand Down Expand Up @@ -1289,7 +1304,7 @@ void main()
ps_blend(C, alpha_blend);

#if PS_SHUFFLE
#if SW_BLEND && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if (SW_BLEND || PS_TFX != 1) && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4040,7 +4040,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
// Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
// Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend.
const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && (no_prim_overlap || m_conf.ps.shuffle));
const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && no_prim_overlap) || m_conf.ps.shuffle;
const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend; // Mix of hw/sw blending

Expand Down
1 change: 0 additions & 1 deletion pcsx2/GS/Renderers/HW/GSTextureCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,6 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c
// FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ???
if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
pxAssert(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
dst = t;
inside_target = false;
break;
Expand Down
23 changes: 21 additions & 2 deletions pcsx2/GS/Renderers/Metal/tfx.metal
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ struct PSMain
else
T = sample_color(st);

if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_before = uint4(T);
if (PS_PROCESS_BA & SHUFFLE_READ)
Expand Down Expand Up @@ -936,6 +936,25 @@ struct PSMain

float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 128.1f) / 128.f : trunc(current_color.a * 255.1f) / 128.f;

if (PS_SHUFFLE && NEEDS_RT)
{
uint4 denorm_rt = uint4(current_color);
if (PS_PROCESS_BA & SHUFFLE_WRITE)
{
current_color.r = float((denorm_rt.b << 3) & 0xF8);
current_color.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
current_color.b = float((denorm_rt.a << 1) & 0xF8);
current_color.a = float(denorm_rt.a & 0x80);
}
else
{
current_color.r = float((denorm_rt.r << 3) & 0xF8);
current_color.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
current_color.b = float((denorm_rt.g << 1) & 0xF8);
current_color.a = float(denorm_rt.g & 0x80);
}
}

float3 Cd = trunc(current_color.rgb * 255.5f);
float3 Cs = Color.rgb;

Expand Down Expand Up @@ -1105,7 +1124,7 @@ struct PSMain

if (PS_SHUFFLE)
{
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_after = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ)
Expand Down

0 comments on commit 30f4e77

Please sign in to comment.