Skip to content

Commit

Permalink
GS/HW: Allow source is rt to use corrected alpha in most cases
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Mar 12, 2024
1 parent 1a355a0 commit b77947c
Show file tree
Hide file tree
Showing 18 changed files with 124 additions and 28 deletions.
4 changes: 2 additions & 2 deletions bin/resources/shaders/dx11/convert.fx
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,15 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f);
output.c = float4(value.rgb, value.a / (128.5f / 255.0f));
return output;
}

PS_OUTPUT ps_rta_decorrection(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f);
output.c = float4(value.rgb, value.a * (128.5f / 255.0f));
return output;
}

Expand Down
19 changes: 16 additions & 3 deletions bin/resources/shaders/dx11/tfx.fx
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#define PS_URBAN_CHAOS_HLE 0
#define PS_HDR 0
#define PS_RTA_CORRECTION 0
#define PS_RTA_SRC_CORRECTION 0
#define PS_COLCLIP 0
#define PS_BLEND_A 0
#define PS_BLEND_B 0
Expand Down Expand Up @@ -330,7 +331,16 @@ uint4 sample_4_index(float4 uv, float uv_w)
c.w = sample_c(uv.zw, uv_w).a;

// Denormalize value
uint4 i = uint4(c * 255.5f);
uint4 i;

if (PS_RTA_SRC_CORRECTION)
{
i = uint4(c * 128.25f); // Denormalize value
}
else
{
i = uint4(c * 255.5f); // Denormalize value
}

if (PS_PAL_FMT == 1)
{
Expand Down Expand Up @@ -650,6 +660,9 @@ float4 sample_color(float2 st, float uv_w)
t = c[0];
}

if (PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION)
t.a = t.a * (128.5f / 255.0f);

return trunc(t * 255.0f + 0.05f);
}

Expand Down Expand Up @@ -850,7 +863,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)

float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f;

float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 127.5f + 0.05f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f;
float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f;
float3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
float3 Cs = Color.rgb;

Expand Down Expand Up @@ -969,7 +982,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
float4 alpha_blend = (float4)0.0f;
if (SW_AD_TO_HW)
{
float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 127.5f + 0.05f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 128.0f + 0.1f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
alpha_blend = (float4)(RT.a / 128.0f);
}
else
Expand Down
4 changes: 2 additions & 2 deletions bin/resources/shaders/opengl/convert.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,15 @@ void ps_datm0_rta_correction()
void ps_rta_correction()
{
vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f);
SV_Target0 = vec4(value.rgb, value.a / (128.5f / 255.0f));
}
#endif

#ifdef ps_rta_decorrection
void ps_rta_decorrection()
{
vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f);
SV_Target0 = vec4(value.rgb, value.a * (128.5f / 255.0f));
}
#endif

Expand Down
14 changes: 11 additions & 3 deletions bin/resources/shaders/opengl/tfx_fs.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,12 @@ uvec4 sample_4_index(vec4 uv)
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;


#if PS_RTA_SRC_CORRECTION
uvec4 i = uvec4(c * 128.25f); // Denormalize value
#else
uvec4 i = uvec4(c * 255.5f); // Denormalize value
#endif

#if PS_PAL_FMT == 1
// 4HL
Expand Down Expand Up @@ -591,6 +595,10 @@ vec4 sample_color(vec2 st)
t = c[0];
#endif

#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION
t.a = t.a * (128.5f / 255.0f);
#endif

// The 0.05f helps to fix the overbloom of sotc
// I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)
// interpolation could be slightly below the correct one.
Expand Down Expand Up @@ -803,7 +811,7 @@ float As = As_rgba.a;
#endif

#if PS_RTA_CORRECTION
float Ad = trunc(RT.a * 127.5f + 0.05f) / 128.0f;
float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f;
#else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif
Expand Down Expand Up @@ -985,7 +993,7 @@ void ps_main()

#if SW_AD_TO_HW
#if PS_RTA_CORRECTION
vec4 RT = trunc(fetch_rt() * 127.5f + 0.05f);
vec4 RT = trunc(fetch_rt() * 128.0f + 0.1f);
#else
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
#endif
Expand Down
4 changes: 2 additions & 2 deletions bin/resources/shaders/vulkan/convert.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ void ps_datm0_rta_correction()
void ps_rta_correction()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f);
o_col0 = vec4(value.rgb, value.a / (128.5f / 255.0f));
}
#endif

#ifdef ps_rta_decorrection
void ps_rta_decorrection()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f);
o_col0 = vec4(value.rgb, value.a * (128.5f / 255.0f));
}
#endif

Expand Down
13 changes: 10 additions & 3 deletions bin/resources/shaders/vulkan/tfx.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,12 @@ uvec4 sample_4_index(vec4 uv)
c.w = sample_c(uv.zw).a;

// Denormalize value

#if PS_RTA_SRC_CORRECTION
uvec4 i = uvec4(c * 128.25f);
#else
uvec4 i = uvec4(c * 255.5f);
#endif

#if PS_PAL_FMT == 1
// 4HL
Expand Down Expand Up @@ -835,7 +840,9 @@ vec4 sample_color(vec2 st)
t = c[0];
}
#endif

#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION
t.a = t.a * (128.5f / 255.0f);
#endif
return trunc(t * 255.0f + 0.05f);
}

Expand Down Expand Up @@ -1056,7 +1063,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
#endif

#if PS_RTA_CORRECTION
float Ad = trunc(RT.a * 127.5f + 0.05f) / 128.0f;
float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f;
#else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif
Expand Down Expand Up @@ -1236,7 +1243,7 @@ void main()

#if SW_AD_TO_HW
#if PS_RTA_CORRECTION
vec4 RT = trunc(sample_from_rt() * 127.5f + 0.05f);
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
#else
vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f);
#endif
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/Common/GSDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ struct alignas(16) GSHWDrawConfig
u32 a_masked : 1;
u32 hdr : 1;
u32 rta_correction : 1;
u32 rta_source_correction : 1;
u32 colclip : 1;
u32 blend_mix : 2;
u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/DX11/GSDevice11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_HDR", sel.hdr);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/DX12/GSDevice12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2817,6 +2817,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_HDR", sel.hdr);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);
Expand Down
23 changes: 22 additions & 1 deletion pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4358,6 +4358,9 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
float scale = tex->GetScale();
HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy);

if (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale)
m_conf.ps.rta_source_correction = 1;

// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
//const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM];
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
Expand Down Expand Up @@ -5265,7 +5268,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
ds->m_alpha_min &= 128;
}
}


// If we Correct/Decorrect and tex is rt, we will need to update the texture reference
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;

if (rt)
{
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || std::max(blend_alpha_max, rt->m_alpha_max) > 128 || m_conf.ps.fbmask || m_conf.ps.tex_is_fb;
Expand All @@ -5278,6 +5284,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;

if (req_src_update)
tex->m_texture = rt->m_texture;
}
else if (m_conf.colormask.wa)
{
Expand All @@ -5289,6 +5298,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;

if (req_src_update)
tex->m_texture = rt->m_texture;
}
}
}
Expand All @@ -5298,6 +5310,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;

if (req_src_update)
tex->m_texture = rt->m_texture;
}
}
else if (rt->m_last_draw == s_n)
Expand All @@ -5308,6 +5323,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;

if (req_src_update)
tex->m_texture = rt->m_texture;
}
}

Expand All @@ -5318,6 +5336,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
{
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass, rt);

if (req_src_update && tex->m_texture != rt->m_texture)
tex->m_texture = rt->m_texture;
}
else
{
Expand Down
35 changes: 30 additions & 5 deletions pcsx2/GS/Renderers/HW/GSTextureCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1690,9 +1690,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
}
#endif

if (dst && (GSUtil::GetChannelMask(TEX0.PSM) & 0x8))
dst->RTADecorrect(dst);

src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region);
if (!src) [[unlikely]]
return nullptr;
Expand Down Expand Up @@ -4299,7 +4296,16 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// copy the rt in
const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy()));
if (!area.rempty())
g_gs_device->CopyRect(sTex, dTex, area, 0, 0);
{
if (dst->m_rt_alpha_scale)
{
const GSVector4 sRectF = GSVector4(area) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(area), ShaderConvert::RTA_DECORRECTION, false);
}
else
g_gs_device->CopyRect(sTex, dTex, area, 0, 0);
}

src->m_texture = dTex;
src->m_unscaled_size = GSVector2i(tw, th);
Expand All @@ -4317,6 +4323,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_region.SetY(y_offset, region.GetMaxY() + y_offset);
else
src->m_region.SetY(y_offset, y_offset + th);

src->m_target_direct = true;
src->m_texture = dst->m_texture;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_shared_texture = true;
Expand Down Expand Up @@ -4554,6 +4562,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
{
// sample the target directly
src->m_texture = dst->m_texture;
src->m_target_direct = true;
src->m_scale = dst->m_scale;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_shared_texture = true;
Expand Down Expand Up @@ -4600,7 +4609,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con

if (use_texture)
{
g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY);
if (dst->m_rt_alpha_scale)
{
const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(destX, destY, sRect.width(), sRect.height()), ShaderConvert::RTA_DECORRECTION, false);
}
else
g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);

#ifdef PCSX2_DEVBUILD
Expand All @@ -4615,12 +4631,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
{
if (is_8bits)
{
if (dst->m_rt_alpha_scale)
{
dst->RTADecorrect(dst);
sTex = dst->m_texture;
}

g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset,
std::max<u32>(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex,
std::max<u32>(TEX0.TBW, 1u) * 64, TEX0.PSM);
}
else
{
if (dst->m_rt_alpha_scale && shader == ShaderConvert::COPY)
shader = ShaderConvert::RTA_DECORRECTION;

const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(destX, destY, new_size.x, new_size.y), shader, false);
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/HW/GSTextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ class GSTextureCache
u8 m_valid_hashes = 0;
u8 m_complete_layers = 0;
bool m_target = false;
bool m_target_direct = false;
bool m_repeating = false;
std::pair<u8, u8> m_alpha_minmax = {0u, 255u};
std::vector<GSVector2i>* m_p2t = nullptr;
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
Original file line number Diff line number Diff line change
Expand Up @@ -1821,6 +1821,7 @@ static GSMTLExpandType ConvertVSExpand(GSHWDrawConfig::VSExpand generic)
setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED);
setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR);
setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION);
setFnConstantB(m_fn_constants, pssel.rta_source_correction, GSMTLConstantIndex_PS_RTA_SRC_CORRECTION);
setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP);
setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);
setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV);
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_A_MASKED,
GSMTLConstantIndex_PS_HDR,
GSMTLConstantIndex_PS_RTA_CORRECTION,
GSMTLConstantIndex_PS_RTA_SRC_CORRECTION,
GSMTLConstantIndex_PS_COLCLIP,
GSMTLConstantIndex_PS_BLEND_MIX,
GSMTLConstantIndex_PS_ROUND_INV,
Expand Down
Loading

0 comments on commit b77947c

Please sign in to comment.