Skip to content

Commit

Permalink
GS/HW: Don't align dirty rectangles to block sizes when updating
Browse files Browse the repository at this point in the history
Don't align the area we write to the target to the block size. If the format matches, the writes don't need
to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture
operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it
uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process
the move in hardware, local memory never gets updated, and thus is stale.
  • Loading branch information
stenzek committed Oct 17, 2023
1 parent b90c2ec commit 3e8e1ed
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 29 deletions.
2 changes: 1 addition & 1 deletion pcsx2-gsrunner/test_check_dumps.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def compare_frames(path1, path2):
def extract_stats(file):
stats = {}
try:
with open(file, "r") as f:
with open(file, "r", errors="ignore") as f:
for line in f.readlines():
m = re.match(".*@HWSTAT@ ([^:]+): (.*) \(avg ([^)]+)\)$", line)
if m is None:
Expand Down
20 changes: 10 additions & 10 deletions pcsx2/GS/Renderers/Common/GSDirtyRect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ GSDirtyRect::GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req
{
}

GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const
GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0, bool align) const
{
GSVector4i _r;

Expand All @@ -48,14 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const
_r.top = (r.top * dst.y) / src.y;
_r.right = (r.right * dst.x) / src.x;
_r.bottom = (r.bottom * dst.y) / src.y;
_r = _r.ralign<Align_Outside>(src);
}
else
{
_r = r.ralign<Align_Outside>(src);
_r = r;
}

return _r;
return align ? _r.ralign<Align_Outside>(src) : _r;
}

GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const
Expand All @@ -66,7 +65,7 @@ GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size

for (auto& dirty_rect : *this)
{
r = r.runion(dirty_rect.GetDirtyRect(TEX0));
r = r.runion(dirty_rect.GetDirtyRect(TEX0, true));
}

const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
Expand All @@ -92,12 +91,13 @@ u32 GSDirtyRectList::GetDirtyChannels()
return channels;
}

GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const
GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const
{
const GSVector4i r = (*this)[index].GetDirtyRect(TEX0);
GSVector4i r = (*this)[index].GetDirtyRect(TEX0, align);
const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
if (align)
r = r.ralign<Align_Outside>(bs);

GSVector2i bs = GSLocalMemory::m_psm[TEX0.PSM].bs;

return r.ralign<Align_Outside>(bs).rintersect(clamp);
return r.rintersect(clamp);
}

4 changes: 2 additions & 2 deletions pcsx2/GS/Renderers/Common/GSDirtyRect.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class GSDirtyRect

GSDirtyRect();
GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req_linear);
GSVector4i GetDirtyRect(GIFRegTEX0 TEX0) const;
GSVector4i GetDirtyRect(GIFRegTEX0 TEX0, bool align) const;
};

class GSDirtyRectList : public std::vector<GSDirtyRect>
Expand All @@ -49,5 +49,5 @@ class GSDirtyRectList : public std::vector<GSDirtyRect>
GSDirtyRectList() {}
GSVector4i GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const;
u32 GetDirtyChannels();
GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const;
GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const;
};
4 changes: 2 additions & 2 deletions pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5497,7 +5497,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
bool is_dirty = false;
for (const GSDirtyRect& rc : tgt->m_dirty)
{
if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(r).rempty())
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(r).rempty())
{
is_dirty = true;
break;
Expand Down Expand Up @@ -5597,7 +5597,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage);
for (GSDirtyRect& rc : src_target->m_dirty)
{
if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(tr).rempty())
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty())
return true;
}
}
Expand Down
35 changes: 21 additions & 14 deletions pcsx2/GS/Renderers/HW/GSTextureCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
for (auto& dirty : t->m_dirty)
{
GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0);
GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0, true);
if (!dirty_rect.rintersect(new_rect).rempty())
{
rect_clean = false;
Expand Down Expand Up @@ -2000,7 +2000,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe

// Don't bother copying the old target in if the whole thing is dirty.
if (dst->m_dirty.empty() || (~dst->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(TEX0.PSM)) != 0 ||
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect()).eq(dst->GetUnscaledRect()))
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect(), false).eq(dst->GetUnscaledRect()))
{
// If the old target was cleared, simply propagate that through.
if (dst_match->m_texture->GetState() == GSTexture::State::Cleared)
Expand Down Expand Up @@ -2508,7 +2508,7 @@ bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src)
for (u32 i = 0; i < dst->m_dirty.size(); i++)
{
GSDirtyRect& dr = dst->m_dirty[i];
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0);
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0, false);
if (!drc.rintersect(clear_dirty_rc).rempty())
{
if ((dr.rgba._u32 &= ~0x7) == 0)
Expand Down Expand Up @@ -4785,7 +4785,7 @@ GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVec
bool is_dirty = false;
for (GSDirtyRect& dirty : t->m_dirty)
{
if (!dirty.GetDirtyRect(t->m_TEX0).rintersect(clut_rc).rempty())
if (!dirty.GetDirtyRect(t->m_TEX0, false).rintersect(clut_rc).rempty())
{
GL_INS("Dirty rectangle overlaps CLUT rectangle, skipping");
is_dirty = true;
Expand Down Expand Up @@ -5414,34 +5414,41 @@ void GSTextureCache::Target::Update()
const GSOffset off(g_gs_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM));
for (size_t i = 0; i < m_dirty.size(); i++)
{
const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect));
if (r.rempty())
// Don't align the area we write to the target to the block size. If the format matches, the writes don't need
// to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture
// operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it
// uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process
// the move in hardware, local memory never gets updated, and thus is stale.
const GSVector4i update_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, false);
if (update_r.rempty())
continue;

const GSVector4i t_r(r - t_offset);
const GSVector4i read_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, true);
const GSVector4i t_r(read_r - t_offset);
if (mapped)
{
g_gs_renderer->m_mem.ReadTexture(
off, r, m.bits + t_r.y * static_cast<u32>(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA);
off, read_r, m.bits + t_r.y * static_cast<u32>(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA);
}
else
{
const int pitch = VectorAlign(r.width() * sizeof(u32));
g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA);
const int pitch = VectorAlign(read_r.width() * sizeof(u32));
g_gs_renderer->m_mem.ReadTexture(off, read_r, s_unswizzle_buffer, pitch, TEXA);

t->Update(t_r, s_unswizzle_buffer, pitch);
}

GSDevice::MultiStretchRect& drect = drects[ndrects++];
drect.src = t;
drect.src_rect = GSVector4(r - t_offset) / t_sizef;
drect.dst_rect = GSVector4(r) * GSVector4(m_scale);
drect.src_rect = GSVector4(update_r - t_offset) / t_sizef;
drect.dst_rect = GSVector4(update_r) * GSVector4(m_scale);
drect.linear = linear && (m_dirty[i].req_linear || override_linear);

// Copy the new GS memory content into the destination texture.
if (m_type == RenderTarget)
{
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w);
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW,
update_r.x, update_r.y, update_r.z, update_r.w);
drect.wmask = static_cast<u8>(m_dirty[i].rgba._u32);
}
else if (m_type == DepthStencil)
Expand Down Expand Up @@ -5498,7 +5505,7 @@ void GSTextureCache::Target::UpdateIfDirtyIntersects(const GSVector4i& rc)

for (auto& dirty : m_dirty)
{
const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0));
const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0, false));
if (dirty_rc.rintersect(rc).rempty())
continue;

Expand Down

0 comments on commit 3e8e1ed

Please sign in to comment.