From 27d71f530abe3ca3906de7293c69cf72376c7a2e Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 25 Oct 2023 20:34:40 +0100 Subject: [PATCH] GS/HW: Disable texture when not required --- pcsx2/GS/GSRegs.h | 1 + pcsx2/GS/Renderers/Common/GSRenderer.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 115 +++++++++++++++---------- 3 files changed, 71 insertions(+), 46 deletions(-) diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h index 352ed39ba008f..9d6b45977e0b6 100644 --- a/pcsx2/GS/GSRegs.h +++ b/pcsx2/GS/GSRegs.h @@ -545,6 +545,7 @@ REG_END2 // output will be Cd, Cs is discarded __forceinline bool IsCdOutput() const { return (C == 2 && D != 1 && FIX == 0x00); } __forceinline bool IsUsingCs() const { return (A == 0 || B == 0 || D == 0); } + __forceinline bool IsUsingAs() const { return (A != B && C == 0); } __forceinline bool IsBlack() const { return ((C == 2 && FIX == 0) || (A == 2 && A == B)) && D == 2; } REG_END2 diff --git a/pcsx2/GS/Renderers/Common/GSRenderer.h b/pcsx2/GS/Renderers/Common/GSRenderer.h index 2b96d58395fa9..80f0932bdb3b9 100644 --- a/pcsx2/GS/Renderers/Common/GSRenderer.h +++ b/pcsx2/GS/Renderers/Common/GSRenderer.h @@ -39,6 +39,7 @@ class GSRenderer : public GSState protected: GSVector2i m_real_size{0, 0}; bool m_texture_shuffle = false; + bool m_process_texture = false; bool m_copy_16bit_to_target_shuffle = false; bool m_same_group_texture_shuffle = false; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 934671995fe43..fb707167e6391 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1899,18 +1899,22 @@ void GSRendererHW::Draw() DetectDoubleHalfClear(no_rt, no_ds); } - const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC); + m_process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC); + const bool not_writing_to_all = (!PrimitiveCoversWithoutGaps() || AreAnyPixelsDiscarded() || !all_depth_tests_pass); + bool preserve_depth = + not_writing_to_all || (!no_ds && (!all_depth_tests_pass || !m_cached_ctx.DepthWrite() || m_cached_ctx.TEST.ATE)); + const u32 frame_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); - const bool tex_is_rt = (process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() && + + // This is a first pass, but it could be disabled further down. + bool tex_is_rt = (m_process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() && m_cached_ctx.TEX0.TBP0 < frame_end_bp); - const bool not_writing_to_all = (!PrimitiveCoversWithoutGaps() || AreAnyPixelsDiscarded() || !all_depth_tests_pass); - const bool preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt)); - const bool preserve_rt_alpha = + bool preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt)); + bool preserve_rt_alpha = (!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all || (tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24))); bool preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha; - bool preserve_depth = - not_writing_to_all || (!no_ds && (!all_depth_tests_pass || !m_cached_ctx.DepthWrite() || m_cached_ctx.TEST.ATE)); + // SW CLUT Render enable. bool force_preload = GSConfig.PreloadFrameWithGSData; @@ -2082,7 +2086,7 @@ void GSRendererHW::Draw() TextureMinMaxResult tmm; // Disable texture mapping if the blend is black and using alpha from vertex. - if (process_texture) + if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; GSVector2i hash_lod_range(0, 0); @@ -2216,42 +2220,50 @@ void GSRendererHW::Draw() const bool possible_shuffle = ((rt_32bit && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) || IsPossibleChannelShuffle(); const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && m_context->ALPHA.C == 0 && m_env.TEXA.AEM; const bool req_color = (!PRIM->ABE || (PRIM->ABE && (m_context->ALPHA.IsUsingCs() || need_aem_color))) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF)); - const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && m_context->TEX0.TCC && ((m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000))); - - src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) : - g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr, - possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha); + const bool alpha_used = m_context->TEX0.TCC && ((PRIM->ABE && m_context->ALPHA.IsUsingAs()) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000))); + const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && alpha_used; - if (unlikely(!src)) + // TODO: Be able to send an alpha of 1.0 (blended with vertex alpha maybe?) so we can avoid sending the texture, since we don't always need it. + // Example games: Evolution Snowboarding, Final Fantasy Dirge of Cerberus, Red Dead Revolver, Stuntman, Tony Hawk's Underground 2, Ultimate Spider-Man. + if (!req_color && !alpha_used) + m_process_texture = false; + else { - GL_INS("ERROR: Source lookup failed, skipping."); - CleanupDraw(true); - return; - } + src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) : + g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr, + possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha); - // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. - // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, - // recompute it, and everything derived from it again if it changes. - if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0) - { - CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second); + if (unlikely(!src)) + { + GL_INS("ERROR: Source lookup failed, skipping."); + CleanupDraw(true); + return; + } - u32 new_fm = m_context->FRAME.FBMSK; - u32 new_zm = m_context->ZBUF.ZMSK || m_context->TEST.ZTE == 0 ? 0xffffffff : 0; - if (m_cached_ctx.TEST.ATE && GSRenderer::TryAlphaTest(new_fm, new_zm)) + // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. + // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, + // recompute it, and everything derived from it again if it changes. + if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0) { - m_cached_ctx.TEST.ATE = false; - m_cached_ctx.FRAME.FBMSK = new_fm; - m_cached_ctx.ZBUF.ZMSK = (new_zm != 0); - fm = new_fm; - zm = new_zm; - no_rt = no_rt || (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk); - no_ds = no_ds || (zm != 0 && all_depth_tests_pass); - if (no_rt && no_ds) + CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second); + + u32 new_fm = m_context->FRAME.FBMSK; + u32 new_zm = m_context->ZBUF.ZMSK || m_context->TEST.ZTE == 0 ? 0xffffffff : 0; + if (m_cached_ctx.TEST.ATE && GSRenderer::TryAlphaTest(new_fm, new_zm)) { - GL_INS("Late draw cancel because no pixels pass alpha test."); - CleanupDraw(true); - return; + m_cached_ctx.TEST.ATE = false; + m_cached_ctx.FRAME.FBMSK = new_fm; + m_cached_ctx.ZBUF.ZMSK = (new_zm != 0); + fm = new_fm; + zm = new_zm; + no_rt = no_rt || (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk); + no_ds = no_ds || (zm != 0 && all_depth_tests_pass); + if (no_rt && no_ds) + { + GL_INS("Late draw cancel because no pixels pass alpha test."); + CleanupDraw(true); + return; + } } } } @@ -2279,6 +2291,17 @@ void GSRendererHW::Draw() target_scale = 1.0f; } + if (!m_process_texture && tex_is_rt) + { + tex_is_rt = (m_process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() && + m_cached_ctx.TEX0.TBP0 < frame_end_bp); + preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt)); + preserve_rt_alpha = + (!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all || + (tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24))); + preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha; + } + GSTextureCache::Target* rt = nullptr; GIFRegTEX0 FRAME_TEX0; if (!no_rt) @@ -2354,7 +2377,7 @@ void GSRendererHW::Draw() } } - if (process_texture) + if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; const GSVertex* v = &m_vertex.buff[0]; @@ -2939,7 +2962,7 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy) { GL_PUSH("IA"); - if (GSConfig.UserHacks_WildHack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST) + if (GSConfig.UserHacks_WildHack && !m_isPackedUV_HackFlag && m_process_texture && PRIM->FST) { for (u32 i = 0; i < m_vertex.next; i++) m_vertex.buff[i].UV &= 0x3FEF3FEF; @@ -5089,7 +5112,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // vs - m_conf.vs.tme = PRIM->TME; + m_conf.vs.tme = m_process_texture; m_conf.vs.fst = PRIM->FST; // FIXME D3D11 and GL support half pixel center. Code could be easier!!! @@ -5431,15 +5454,15 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw() // Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway // what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked. if ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.FRAME.Block()) || - (PRIM->TME && ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2))) + (m_process_texture && ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2))) return CLUTDrawTestResult::NotCLUTDraw; // Ignore large render targets, make sure it's staying in page width. - if (PRIM->TME && (m_cached_ctx.FRAME.FBW != 1 && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW)) + if (m_process_texture && (m_cached_ctx.FRAME.FBW != 1 && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW)) return CLUTDrawTestResult::NotCLUTDraw; // Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle. - if (PRIM->TME && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0) + if (m_process_texture && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0) return CLUTDrawTestResult::NotCLUTDraw; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; @@ -5481,7 +5504,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw() if (!valid_size) return CLUTDrawTestResult::NotCLUTDraw; - if (PRIM->TME) + if (m_process_texture) { // If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need. const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage; @@ -6081,7 +6104,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) { GL_PUSH("OI_BlitFMV");