From 74fd9132ee41857b06ddd054e634eda0b4a628f5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 23 Jun 2024 07:10:42 +1000 Subject: [PATCH 1/3] GS/HW: Allow use of trilinear with shader sampling --- bin/resources/shaders/dx11/tfx.fx | 9 +++-- bin/resources/shaders/opengl/tfx_fs.glsl | 9 +++-- bin/resources/shaders/vulkan/tfx.glsl | 9 +++-- pcsx2/GS/Renderers/Common/GSDevice.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 39 +++++++++++--------- pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h | 1 + pcsx2/GS/Renderers/Metal/tfx.metal | 8 ++-- pcsx2/ShaderCacheVersion.h | 2 +- 8 files changed, 43 insertions(+), 35 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 644bec937a88f..012507b884d7c 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -162,6 +162,7 @@ cbuffer cb1 uint4 FbMask; float4 HalfTexel; float4 MinMax; + float4 LODParams; float4 STRange; int4 ChannelShuffle; float2 TC_OffsetHack; @@ -207,10 +208,10 @@ float4 sample_c(float2 uv, float uv_w) return Texture.Sample(TextureSampler, uv); #elif PS_MANUAL_LOD == 1 // FIXME add LOD: K - ( LOG2(Q) * (1 << L)) - float K = MinMax.x; - float L = MinMax.y; - float bias = MinMax.z; - float max_lod = MinMax.w; + float K = LODParams.x; + float L = LODParams.y; + float bias = LODParams.z; + float max_lod = LODParams.w; float gs_lod = K - log2(abs(uv_w)) * L; // FIXME max useful ? diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 28e65b632c344..1cd64ab08c8c5 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -44,6 +44,7 @@ layout(std140, binding = 0) uniform cb21 vec4 HalfTexel; vec4 MinMax; + vec4 LODParams; vec4 STRange; ivec4 ChannelShuffle; @@ -159,10 +160,10 @@ vec4 sample_c(vec2 uv) return texture(TextureSampler, uv); #elif PS_MANUAL_LOD == 1 // FIXME add LOD: K - ( LOG2(Q) * (1 << L)) - float K = MinMax.x; - float L = MinMax.y; - float bias = MinMax.z; - float max_lod = MinMax.w; + float K = LODParams.x; + float L = LODParams.y; + float bias = LODParams.z; + float max_lod = LODParams.w; float gs_lod = K - log2(abs(PSin.t_float.w)) * L; // FIXME max useful ? diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 40efb1cea7231..5363f07745db6 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -316,6 +316,7 @@ layout(std140, set = 0, binding = 1) uniform cb1 uvec4 FbMask; vec4 HalfTexel; vec4 MinMax; + vec4 LODParams; vec4 STRange; ivec4 ChannelShuffle; vec2 TC_OffsetHack; @@ -399,10 +400,10 @@ vec4 sample_c(vec2 uv) return texture(Texture, uv); #elif PS_MANUAL_LOD == 1 // FIXME add LOD: K - ( LOG2(Q) * (1 << L)) - float K = MinMax.x; - float L = MinMax.y; - float bias = MinMax.z; - float max_lod = MinMax.w; + float K = LODParams.x; + float L = LODParams.y; + float bias = LODParams.z; + float max_lod = LODParams.w; float gs_lod = K - log2(abs(vsIn.t.w)) * L; // FIXME max useful ? diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 6951b39436089..eb563679fb6b6 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -585,6 +585,7 @@ struct alignas(16) GSHWDrawConfig GSVector4 HalfTexel; GSVector4 MinMax; + GSVector4 LODParams; GSVector4 STRange; GSVector4i ChannelShuffle; GSVector2 TCOffsetHack; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 9957b32d89977..a337a8dd7712d 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -4739,7 +4739,6 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT __ri static constexpr bool IsRedundantClamp(u8 clamp, u32 clamp_min, u32 clamp_max, u32 tsize) { // Don't shader sample when the clamp/repeat is configured to the texture size. - // That way trilinear etc still works. const u32 textent = (1u << tsize) - 1u; if (clamp == CLAMP_REGION_CLAMP) return (clamp_min == 0 && clamp_max >= textent); @@ -4805,6 +4804,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, const bool need_mipmap = IsMipMapDraw(); const bool shader_emulated_sampler = tex->m_palette || (tex->m_target && !m_conf.ps.shuffle && cpsm.fmt != 0) || complex_wms_wmt || psm.depth || target_region; + const bool can_trilinear = !tex->m_palette && !tex->m_target && !m_conf.ps.shuffle; const bool trilinear_manual = need_mipmap && GSConfig.HWMipmap; bool bilinear = m_vt.IsLinear(); @@ -4817,8 +4817,11 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, // Force bilinear otherwise we can end up with min/mag nearest and mip linear. // We don't need to check for HWMipmapLevel::Off here, because forced trilinear implies forced mipmaps. bilinear = true; - trilinear = static_cast(GS_MIN_FILTER::Linear_Mipmap_Linear); - trilinear_auto = !tex->m_target && (!need_mipmap || !GSConfig.HWMipmap); + if (can_trilinear) + { + trilinear = static_cast(GS_MIN_FILTER::Linear_Mipmap_Linear); + trilinear_auto = !tex->m_target && (!need_mipmap || !GSConfig.HWMipmap); + } } break; @@ -4826,7 +4829,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, case TriFiltering::Automatic: { // Can only use PS2 trilinear when mipmapping is enabled. - if (need_mipmap && GSConfig.HWMipmap) + if (need_mipmap && GSConfig.HWMipmap && can_trilinear) { trilinear = m_context->TEX1.MMIN; trilinear_auto = !tex->m_target && !GSConfig.HWMipmap; @@ -5033,17 +5036,19 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, m_conf.cb_ps.MinMax.w = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.w : region_repeat.w; } } - else if (trilinear_manual) + + if (trilinear_manual) { - // Reuse uv_min_max for mipmap parameter to avoid an extension of the UBO - m_conf.cb_ps.MinMax.x = static_cast(m_context->TEX1.K) / 16.0f; - m_conf.cb_ps.MinMax.y = static_cast(1 << m_context->TEX1.L); - m_conf.cb_ps.MinMax.z = static_cast(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better - m_conf.cb_ps.MinMax.w = static_cast(m_lod.y); + m_conf.cb_ps.LODParams.x = static_cast(m_context->TEX1.K) / 16.0f; + m_conf.cb_ps.LODParams.y = static_cast(1 << m_context->TEX1.L); + m_conf.cb_ps.LODParams.z = static_cast(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better + m_conf.cb_ps.LODParams.w = static_cast(m_lod.y); + m_conf.ps.manual_lod = 1; } else if (trilinear_auto) { tex->m_texture->GenerateMipmapsIfNeeded(); + m_conf.ps.automatic_lod = 1; } // TC Offset Hack @@ -5059,7 +5064,11 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, { m_conf.sampler.biln = 0; m_conf.sampler.aniso = 0; - m_conf.sampler.triln = 0; + + // Remove linear from trilinear, since we're doing the bilinear in the shader, and we only want this for mip selection. + m_conf.sampler.triln = (trilinear >= static_cast(GS_MIN_FILTER::Linear_Mipmap_Nearest)) ? + (trilinear - static_cast(GS_MIN_FILTER::Nearest_Mipmap_Nearest)) : + 0; } else { @@ -5069,14 +5078,8 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, const bool anisotropic = m_vt.m_primclass == GS_TRIANGLE_CLASS && !trilinear_manual; m_conf.sampler.aniso = anisotropic; m_conf.sampler.triln = trilinear; - if (trilinear_manual) - { - m_conf.ps.manual_lod = 1; - } - else if (trilinear_auto || anisotropic) - { + if (anisotropic && !trilinear_manual) m_conf.ps.automatic_lod = 1; - } } // clamp to base level if we're not providing or generating mipmaps diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index 8fa177776dca9..851ef0f091981 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -111,6 +111,7 @@ struct GSMTLMainPSUniform vector_float4 uv_min_max; vector_uint4 uv_msk_fix; }; + vector_float4 lod_params; vector_float4 st_range; struct { diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 56a7941502c61..55729ecfeef48 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -356,10 +356,10 @@ struct PSMain } else if (PS_MANUAL_LOD) { - float K = cb.uv_min_max.x; - float L = cb.uv_min_max.y; - float bias = cb.uv_min_max.z; - float max_lod = cb.uv_min_max.w; + float K = cb.lod_params.x; + float L = cb.lod_params.y; + float bias = cb.lod_params.z; + float max_lod = cb.lod_params.w; float gs_lod = K - log2(abs(in.t.w)) * L; // FIXME max useful ? diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 23060639d17d8..4f712fe342baa 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 50; +static constexpr u32 SHADER_CACHE_VERSION = 51; From 21f5008b83b79a3941ad5b1866a6bb87b7a631d3 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 23 Jun 2024 08:52:42 +1000 Subject: [PATCH 2/3] GS/HW: Better eliminate redundant clamps with mipmapped draws TW/TH won't match when mipmap minimization eliminates the base level. Use the TEX0 register from the context instead. --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index a337a8dd7712d..c5d243360248b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -4789,9 +4789,9 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, // we keep the shader clamp. See #5851 on github, and the note in Draw(). [[maybe_unused]] static constexpr const char* clamp_modes[] = {"REPEAT", "CLAMP", "REGION_CLAMP", "REGION_REPEAT"}; const bool redundant_wms = IsRedundantClamp(m_cached_ctx.CLAMP.WMS, m_cached_ctx.CLAMP.MINU, - m_cached_ctx.CLAMP.MAXU, tex->m_TEX0.TW); + m_cached_ctx.CLAMP.MAXU, m_cached_ctx.TEX0.TW); const bool redundant_wmt = IsRedundantClamp(m_cached_ctx.CLAMP.WMT, m_cached_ctx.CLAMP.MINV, - m_cached_ctx.CLAMP.MAXV, tex->m_TEX0.TH); + m_cached_ctx.CLAMP.MAXV, m_cached_ctx.TEX0.TH); const u8 wms = EffectiveClamp(m_cached_ctx.CLAMP.WMS, !tex->m_target && (source_region.HasX() || redundant_wms)); const u8 wmt = EffectiveClamp(m_cached_ctx.CLAMP.WMT, !tex->m_target && (source_region.HasY() || redundant_wmt)); const bool complex_wms_wmt = !!((wms | wmt) & 2) || target_region; From 09bc0bdde646f27a325a7c8377c10d4f05235172 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 23 Jun 2024 11:26:43 +1000 Subject: [PATCH 3/3] GS/HW: Ensure mipmaps aren't generated on hash cache textures --- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index fc96e3bf526ff..3646ef1fc925f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -5366,8 +5366,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 if (lod) { const int basemip = lod->x; - const int nmips = lod->y - lod->x + 1; - for (int mip = 1; mip < nmips; mip++) + for (int mip = 1; mip < tlevels; mip++) { const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)}; std::pair mip_alpha_minmax; @@ -5379,6 +5378,8 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 alpha_minmax.second = std::max(alpha_minmax.second, mip_alpha_minmax.second); } } + + tex->ClearMipmapGenerationFlag(); } // remove the palette hash when using paltex/indexed