From fce8317da7cc876133f3257f7bbc11d2b73276ec Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 1 Apr 2024 01:12:24 +1000 Subject: [PATCH] GS/HW: Compute source alpha min/max based on texture instead of CLUT Stops alpha from unused CLUT colours from leaking into the alpha range, saves a good number of draw calls on many games, and some RTA conversions. --- pcsx2/GS/GSState.cpp | 18 ++++++--- pcsx2/GS/GSState.h | 4 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 2 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 51 +++++++++++++++--------- pcsx2/GS/Renderers/HW/GSTextureCache.h | 4 +- 5 files changed, 52 insertions(+), 27 deletions(-) diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 2e825c05002a7..861bfe7e64353 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3916,17 +3916,25 @@ void GSState::CalcAlphaMinMax(const int tex_alpha_min, const int tex_alpha_max) case 1: // If we're using the alpha from the texture, not the whole range, we can just use tex_alpha_min/max. // AEM and TA0 re precomputed with GSBlock::ReadAndExpandBlock24, so already worked out for tex_alpha. - a.y = (tex_alpha_max < 500) ? min : (env.TEXA.AEM ? 0 : env.TEXA.TA0); - a.w = (tex_alpha_max < 500) ? max : env.TEXA.TA0; + a.y = (tex_alpha_max < INVALID_ALPHA_MINMAX) ? min : (env.TEXA.AEM ? 0 : env.TEXA.TA0); + a.w = (tex_alpha_max < INVALID_ALPHA_MINMAX) ? max : env.TEXA.TA0; break; case 2: // If we're using the alpha from the texture, not the whole range, we can just use tex_alpha_min/max. // AEM, TA0 and TA1 are precomputed with GSBlock::ReadAndExpandBlock16, so already worked out for tex_alpha. - a.y = (tex_alpha_max < 500) ? min : (env.TEXA.AEM ? 0 : std::min(env.TEXA.TA0, env.TEXA.TA1)); - a.w = (tex_alpha_max < 500) ? max : std::max(env.TEXA.TA0, env.TEXA.TA1); + a.y = (tex_alpha_max < INVALID_ALPHA_MINMAX) ? min : (env.TEXA.AEM ? 0 : std::min(env.TEXA.TA0, env.TEXA.TA1)); + a.w = (tex_alpha_max < INVALID_ALPHA_MINMAX) ? max : std::max(env.TEXA.TA0, env.TEXA.TA1); break; case 3: - m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); + if (tex_alpha_max < INVALID_ALPHA_MINMAX) + { + a.y = min; + a.w = max; + } + else + { + m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); + } break; default: ASSUME(0); diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 7496a277267c9..49bc5cf5961c4 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -122,6 +122,8 @@ class GSState : public GSAlignedClass<32> } m_tr; protected: + static constexpr int INVALID_ALPHA_MINMAX = 500; + GSVertex m_v = {}; float m_q = 1.0f; GSVector4i m_scissor_cull_min = {}; @@ -163,7 +165,7 @@ class GSState : public GSAlignedClass<32> GSVertexTrace::VertexAlpha& GetAlphaMinMax() { if (!m_vt.m_alpha.valid) - CalcAlphaMinMax(0, 500); + CalcAlphaMinMax(0, INVALID_ALPHA_MINMAX); return m_vt.m_alpha; } struct TextureMinMaxResult diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 321459490744b..0fecc8a095182 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2489,7 +2489,7 @@ void GSRendererHW::Draw() // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. - if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0) + if (src->m_valid_alpha_minmax) { CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index bd3e05ff29430..044ef0db6938f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -993,7 +993,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c if (palette) { - AttachPaletteToSource(src, psm_s.pal, true); + AttachPaletteToSource(src, psm_s.pal, true, true); } } else @@ -1645,7 +1645,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (TEX0.PSM == PSMT8H) { // Attach palette for GPU texture conversion - AttachPaletteToSource(src, psm_s.pal, true); + AttachPaletteToSource(src, psm_s.pal, true, true); } return src; @@ -1666,7 +1666,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (TEX0.PSM == PSMT8H) { // Attach palette for GPU texture conversion - AttachPaletteToSource(src, psm_s.pal, true); + AttachPaletteToSource(src, psm_s.pal, true, true); } return src; @@ -1715,6 +1715,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Guard against merged targets which don't actually link. if (src->m_target && src->m_from_target) { + src->m_valid_alpha_minmax = true; + if ((src->m_TEX0.PSM & 0xf) == PSMCT24) { src->m_alpha_minmax.first = TEXA.AEM ? 0 : TEXA.TA0; @@ -1739,7 +1741,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else if (src->m_palette && (!src->m_palette_obj || !src->ClutMatch({clut, psm_s.pal}))) - AttachPaletteToSource(src, psm_s.pal, true); + AttachPaletteToSource(src, psm_s.pal, true, true); } src->Update(r); @@ -4375,6 +4377,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_from_target = dst; src->m_from_target_TEX0 = dst->m_TEX0; + src->m_valid_alpha_minmax = true; if ((src->m_TEX0.PSM & 0xf) == PSMCT24) { src->m_alpha_minmax.first = TEXA.AEM ? 0 : TEXA.TA0; @@ -4399,7 +4402,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (psm.pal > 0) { // Attach palette for GPU texture conversion - AttachPaletteToSource(src, psm.pal, true); + AttachPaletteToSource(src, psm.pal, true, true); } #ifdef PCSX2_DEVBUILD @@ -4455,6 +4458,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Do this first as we could be adding in alpha from an upgraded 24bit target. dst->Update(); + src->m_valid_alpha_minmax = true; if ((src->m_TEX0.PSM & 0xf) == PSMCT24) { src->m_alpha_minmax.first = TEXA.AEM ? 0 : TEXA.TA0; @@ -4710,7 +4714,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Because the texture is already on the GPU, CPU can't convert it. if (psm.pal > 0) { - AttachPaletteToSource(src, psm.pal, true); + AttachPaletteToSource(src, psm.pal, true, true); } // Offset hack. Can be enabled via GS options. @@ -4742,11 +4746,12 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con { src->m_texture = src->m_from_hash_cache->texture; src->m_alpha_minmax = src->m_from_hash_cache->alpha_minmax; + src->m_valid_alpha_minmax = src->m_from_hash_cache->valid_alpha_minmax; if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else if (psm.pal > 0) - AttachPaletteToSource(src, psm.pal, paltex); + AttachPaletteToSource(src, psm.pal, paltex, false); } else if (paltex) { @@ -4762,7 +4767,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else - AttachPaletteToSource(src, psm.pal, true); + AttachPaletteToSource(src, psm.pal, true, true); } else { @@ -4778,7 +4783,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else if (psm.pal > 0) - AttachPaletteToSource(src, psm.pal, false); + AttachPaletteToSource(src, psm.pal, false, true); } #ifdef PCSX2_DEVBUILD @@ -5231,10 +5236,12 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 return nullptr; } - // upload base level - const bool is_direct = (GSLocalMemory::m_psm[TEX0.PSM].pal == 0); + // compute alpha minmax on all textures, unless paltex is on, because not all CLUT colors are used. + const bool compute_alpha_minmax = !paltex; std::pair alpha_minmax = {0u, 255u}; - PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0, is_direct ? &alpha_minmax : nullptr); + + // upload base level + PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0, compute_alpha_minmax ? &alpha_minmax : nullptr); // upload mips if present if (lod) @@ -5246,8 +5253,8 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)}; std::pair mip_alpha_minmax; PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip, - is_direct ? &mip_alpha_minmax : nullptr); - if (!is_direct) + compute_alpha_minmax ? &mip_alpha_minmax : nullptr); + if (compute_alpha_minmax) { alpha_minmax.first = std::min(alpha_minmax.first, mip_alpha_minmax.first); alpha_minmax.second = std::max(alpha_minmax.second, mip_alpha_minmax.second); @@ -5260,7 +5267,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 key.RemoveCLUTHash(); // insert into the cache cache, and we're done - const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, false}; + const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, compute_alpha_minmax, false}; m_hash_cache_memory_usage += tex->GetMemUsage(); return &m_hash_cache.emplace(key, entry).first->second; } @@ -5916,7 +5923,7 @@ void GSTextureCache::Source::PreloadLevel(int level) m_layer_hash[level] = hash; // And upload the texture. - if (IsPaletteFormat()) + if (!m_valid_alpha_minmax) { PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level, nullptr); @@ -6435,11 +6442,15 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) delete s; } -void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture) +void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture, bool update_alpha_minmax) { s->m_palette_obj = m_palette_map.LookupPalette(pal, need_gs_texture); s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr; - s->m_alpha_minmax = s->m_palette_obj->GetAlphaMinMax(); + if (update_alpha_minmax) + { + s->m_alpha_minmax = s->m_palette_obj->GetAlphaMinMax(); + s->m_valid_alpha_minmax = true; + } } void GSTextureCache::AttachPaletteToSource(Source* s, GSTexture* gpu_clut) @@ -6448,6 +6459,7 @@ void GSTextureCache::AttachPaletteToSource(Source* s, GSTexture* gpu_clut) s->m_palette = gpu_clut; // Unknown. + s->m_valid_alpha_minmax = false; s->m_alpha_minmax.first = 0; s->m_alpha_minmax.second = 255; } @@ -6657,7 +6669,7 @@ void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* { // We must've got evicted before we finished loading. No matter, add it in there anyway; // if it's not used again, it'll get tossed out later. - const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, true}; + const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, true, true}; m_hash_cache.emplace(key, entry); return; } @@ -6665,6 +6677,7 @@ void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* // Reset age so we don't get thrown out too early. it->second.age = 0; it->second.alpha_minmax = alpha_minmax; + it->second.valid_alpha_minmax = true; // Update memory usage, swap the textures, and recycle the old one for reuse. if (!it->second.is_replacement) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 37c31e3d9322e..f58b79b060667 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -116,6 +116,7 @@ class GSTextureCache u32 refcount; u16 age; std::pair alpha_minmax; + bool valid_alpha_minmax; bool is_replacement; }; @@ -286,6 +287,7 @@ class GSTextureCache bool m_target = false; bool m_target_direct = false; bool m_repeating = false; + bool m_valid_alpha_minmax = false; std::pair m_alpha_minmax = {0u, 255u}; std::vector* m_p2t = nullptr; // Keep a trace of the target origin. There is no guarantee that pointer will @@ -537,7 +539,7 @@ class GSTextureCache return (type == DepthStencil) ? "Depth" : "Color"; } - void AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture); + void AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture, bool update_alpha_minmax); void AttachPaletteToSource(Source* s, GSTexture* gpu_clut); SurfaceOffset ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t); SurfaceOffset ComputeSurfaceOffset(const uint32_t bp, const uint32_t bw, const uint32_t psm, const GSVector4i& r, const Target* t);