Skip to content

Commit

Permalink
GS: Improve optimizing scissoring texture when REPEAT sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Apr 1, 2024
1 parent 8dce187 commit f3a75f5
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 52 deletions.
87 changes: 46 additions & 41 deletions pcsx2/GS/GSState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3643,7 +3643,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i
return sets_bits || clears_bits;
}

GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize)
GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps)
{
// TODO: some of the +1s can be removed if linear == false

Expand Down Expand Up @@ -3752,7 +3752,7 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL

// Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this
// optimization doesn't work when perspective correction is enabled.
if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && m_index.tail < 3)
if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && no_gaps)
{
// When coordinates are fractional, GS appears to draw to the right/bottom (effectively
// taking the ceiling), not to the top/left (taking the floor).
Expand All @@ -3768,50 +3768,55 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]];
const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]];

GSVector4 new_st = st;
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
const bool u_forward = vert_first->U < vert_second->U;
const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X;
const bool swap_x = u_forward != x_forward;

if (int_rc.left < scissored_rc.left)
{
if (!swap_x)
new_st.x += floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
else
new_st.z -= floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
}
if (int_rc.right > scissored_rc.right)
{
if (!swap_x)
new_st.z -= floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
else
new_st.x += floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
}
// we need to check that it's not going to repeat over the non-clipped part
if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast<int>(st.x) & ~tw_mask) == (static_cast<int>(st.z) & ~tw_mask)))
if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast<int>(new_st.x) & ~tw_mask) == (static_cast<int>(new_st.z) & ~tw_mask)))
{
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
const bool u_forward = vert_first->U < vert_second->U;
const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X;
const bool swap_x = u_forward != x_forward;
st.x = new_st.x;
st.z = new_st.z;
}

if (int_rc.left < scissored_rc.left)
{
if(!swap_x)
st.x += floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
else
st.z -= floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
}
if (int_rc.right > scissored_rc.right)
{
if (!swap_x)
st.z -= floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
else
st.x += floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
}
const bool v_forward = vert_first->V < vert_second->V;
const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y;
const bool swap_y = v_forward != y_forward;

if (int_rc.top < scissored_rc.top)
{
if (!swap_y)
new_st.y += floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
else
new_st.w -= floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
}
if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast<int>(st.y) & ~th_mask) == (static_cast<int>(st.w) & ~th_mask)))
if (int_rc.bottom > scissored_rc.bottom)
{
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
const bool v_forward = vert_first->V < vert_second->V;
const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y;
const bool swap_y = v_forward != y_forward;

if (int_rc.top < scissored_rc.top)
{
if (!swap_y)
st.y += floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
else
st.w -= floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
}
if (int_rc.bottom > scissored_rc.bottom)
{
if (!swap_y)
st.w -= floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
else
st.y += floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
}
if (!swap_y)
new_st.w -= floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
else
new_st.y += floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
}
if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast<int>(new_st.y) & ~th_mask) == (static_cast<int>(new_st.w) & ~th_mask)))
{
st.y = new_st.y;
st.w = new_st.w;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/GS/GSState.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class GSState : public GSAlignedClass<32>
GSVector4i coverage; ///< Part of the texture used
u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved)
};
TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize);
TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps);
bool TryAlphaTest(u32& fm, u32& zm);
bool IsOpaque();
bool IsMipMapDraw();
Expand Down
13 changes: 7 additions & 6 deletions pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2100,6 +2100,7 @@ void GSRendererHW::Draw()
// --------------------------------------
m_r = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p) + GSVector4::cxpr(0.5f));
m_r = m_r.blend8(m_r + GSVector4i::cxpr(0, 0, 1, 1), (m_r.xyxy() == m_r.zwzw()));
m_r_no_scissor = m_r;
m_r = m_r.rintersect(context->scissor.in);

// Draw is too small, just skip it.
Expand Down Expand Up @@ -2404,7 +2405,7 @@ void GSRendererHW::Draw()
TEX0 = m_cached_ctx.TEX0;
}

tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false);
tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps);

// Snowblind games set TW/TH to 1024, and use UVs for smaller textures inside that.
// Such textures usually contain junk in local memory, so try to make them smaller based on UVs.
Expand Down Expand Up @@ -2795,7 +2796,7 @@ void GSRendererHW::Draw()
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;

tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false);
tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps);

src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x);
}
Expand Down Expand Up @@ -5947,7 +5948,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
if (m_process_texture)
{
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage;
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage;

// If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download.
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
Expand Down Expand Up @@ -6060,7 +6061,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
// If the EE has written over our sample area, we're fine to do this on the CPU, despite the target.
if (!src_target->m_dirty.empty())
{
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage);
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage);
for (GSDirtyRect& rc : src_target->m_dirty)
{
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty())
Expand Down Expand Up @@ -6773,7 +6774,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps()
const int first_dpX = v[1].XYZ.X - v[0].XYZ.X;

// Horizontal Match.
if ((first_dpX >> 4) == m_r.z)
if ((first_dpX >> 4) == m_r_no_scissor.z)
{
// Borrowed from MergeSprite() modified to calculate heights.
for (u32 i = 2; i < m_vertex.next; i += 2)
Expand All @@ -6792,7 +6793,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps()
}

// Vertical Match.
if ((first_dpY >> 4) == m_r.w)
if ((first_dpY >> 4) == m_r_no_scissor.w)
{
// Borrowed from MergeSprite().
const int offset_X = m_context->XYOFFSET.OFX;
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/HW/GSRendererHW.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ class GSRendererHW : public GSRenderer
bool IsUsingAsInBlend();

GSVector4i m_r = {};
GSVector4i m_r_no_scissor = {};

// We modify some of the context registers to optimize away unnecessary operations.
// Instead of messing with the real context, we copy them and use those instead.
Expand Down
4 changes: 2 additions & 2 deletions pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b

GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), mipmap);

const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage;

if (!hw.m_sw_texture[0])
hw.m_sw_texture[0] = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
Expand Down Expand Up @@ -287,7 +287,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
else
hw.m_sw_texture[i]->Reset(gd.sel.tw + 3, MIP_TEX0, env.TEXA);

GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage;
hw.m_sw_texture[i]->Update(r);
gd.tex[i] = hw.m_sw_texture[i]->m_buff;
}
Expand Down
4 changes: 2 additions & 2 deletions pcsx2/GS/Renderers/SW/GSRendererSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)

GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap);

GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage;

GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);

Expand Down Expand Up @@ -1160,7 +1160,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
return false;
}

GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage;

data->SetSource(t, r, i);
}
Expand Down

0 comments on commit f3a75f5

Please sign in to comment.