Skip to content

Commit

Permalink
GS: Further transfer optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Oct 3, 2023
1 parent 2dc3bf1 commit 7b97251
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 152 deletions.
90 changes: 43 additions & 47 deletions pcsx2/GS/GSRegs.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ enum GIF_REG

enum GIF_REG_COMPLEX
{
GIF_REG_STQRGBAXYZF2 = 0x00,
GIF_REG_STQRGBAXYZ2 = 0x01,
GIF_REG_UVRGBAXYZ2 = 0x02,
GIF_REG_RGBAXYZF2 = 0x00,
GIF_REG_STQXYZF2 = 0x01,
GIF_REG_UVXYZF2 = 0x02,
GIF_REG_RGBAXYZ2 = 0x03,
GIF_REG_RGBAXYZF2 = 0x04,
GIF_REG_STQXYZ2 = 0x05,
GIF_REG_STQXYZF2 = 0x06,
GIF_REG_UVXYZ2 = 0x07,
GIF_REG_UVXYZF2 = 0x08,
GIF_REG_STQXYZ2 = 0x04,
GIF_REG_UVXYZ2 = 0x05,
GIF_REG_STQRGBAXYZF2 = 0x06,
GIF_REG_STQRGBAXYZ2 = 0x07,
GIF_REG_UVRGBAXYZ2 = 0x08,
GIF_REG_RGBAUVXYZF2 = 0x09,
GIF_REG_UVRGBAXYZF2 = 0x0A
};
Expand Down Expand Up @@ -1132,19 +1132,19 @@ struct alignas(32) GIFPath

enum
{
TYPE_UNKNOWN,
TYPE_ADONLY,
TYPE_STQRGBAXYZF2,
TYPE_STQRGBAXYZ2,
TYPE_UVRGBAXYZ2,
TYPE_RGBAXYZF2,
TYPE_UVXYZF2,
TYPE_STQXYZF2,
TYPE_UVXYZF2,
TYPE_RGBAXYZ2,
TYPE_UVXYZ2,
TYPE_STQXYZ2,
TYPE_UVXYZ2,
TYPE_STQRGBAXYZF2,
TYPE_STQRGBAXYZ2,
TYPE_UVRGBAXYZ2,
TYPE_RGBAUVXYZF2,
TYPE_UVRGBAXYZF2,
TYPE_ADONLY,
TYPE_UNKNOWN,
};

__forceinline void SetTag(const void* mem)
Expand All @@ -1153,20 +1153,18 @@ struct alignas(32) GIFPath

// the compiler has a hard time not reloading every time a field of src is accessed

u32 a = src->U32[0];
u32 b = src->U32[1];
const u64 a = src->U64[0];

tag.U32[0] = a;
tag.U32[1] = b;
tag.U64[0] = a;

nloop = a & 0x7fff;

if (nloop == 0)
return;

GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though
const GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though

nreg = (b & 0xf0000000) ? (b >> 28) : 16; // src->NREG
nreg = (a & 0xf000000000000000ULL) ? (a >> 60) : 16; // src->NREG
regs = v.upl8(v >> 4) & GSVector4i::x0f(nreg);
reg = 0;

Expand All @@ -1178,49 +1176,47 @@ struct alignas(32) GIFPath
{
type = TYPE_ADONLY;
}
else
else if(nloop > 4)
{
switch (nreg)
{
case 1:
break;
case 2:
if (regs.eq8(GSVector4i::cxpr(0x00000401)).mask() == (1 << 2) - 1)
type = TYPE_RGBAXYZF2;
else if (regs.eq8(GSVector4i::cxpr(0x0000040)).mask() == (1 << 2) - 1)
type = TYPE_STQXYZF2;
else if (regs.eq8(GSVector4i::cxpr(0x00000402)).mask() == (1 << 2) - 13)
type = TYPE_UVXYZF2;
else if (regs.eq8(GSVector4i::cxpr(0x00000501)).mask() == (1 << 2) - 1)
type = TYPE_RGBAXYZ2;
else if (regs.eq8(GSVector4i::cxpr(0x00000502)).mask() == (1 << 2) - 1)
type = TYPE_STQXYZ2;
else if (regs.eq8(GSVector4i::cxpr(0x00000503)).mask() == (1 << 2) - 1)
type = TYPE_UVXYZ2;
{
const u32 val = regs.U32[0];
if ((val >> 8) == 0x04)
{
type = (val >= 0x401 && val <= 0x403) ? (TYPE_RGBAXYZF2 + (val & 0xf) - 1) : type;
}
else if ((val >> 8) == 0x05)
{
type = (val >= 0x501 && val <= 0x503) ? (TYPE_RGBAXYZ2 + (val & 0xf) - 1) : type;
}
}
break;
case 3:
// many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f)
if (regs.eq8(GSVector4i::cxpr(0x00040102)).mask() == (1 << 3) - 1)
if (regs.U32[0] == 0x00040102)
type = TYPE_STQRGBAXYZF2;
// GoW (has other crazy formats, like ...030503050103)
else if (regs.eq8(GSVector4i::cxpr(0x00050102)).mask() == (1 << 3) - 1)
else if (regs.U32[0] == 0x00050102)
type = TYPE_STQRGBAXYZ2;
// TODO: common types with UV instead
else if (regs.eq8(GSVector4i::cxpr(0x00050103)).mask() == (1 << 3) - 1)
else if (regs.U32[0] == 0x00050103)
type = TYPE_UVRGBAXYZ2;
else if (regs.eq8(GSVector4i::cxpr(0x00040103)).mask() == (1 << 3) - 1)
else if (regs.U32[0] == 0x00040103)
type = TYPE_UVRGBAXYZF2;
else if (regs.eq8(GSVector4i::cxpr(0x00040301)).mask() == (1 << 3) - 1)
else if (regs.U32[0] == 0x00040301)
type = TYPE_RGBAUVXYZF2;
break;
case 4:
if (regs.eq8(GSVector4i(0x04030403)).mask() == (1 << 4) - 1)
if (regs.U32[0] == 0x04030403)
{
type = TYPE_UVXYZF2;
nreg = 2;
nloop *= 2;
}
else if (regs.eq8(GSVector4i::cxpr(0x05030503)).mask() == (1 << 4) - 1)
else if (regs.U32[0] == 0x05030503)
{
type = TYPE_UVXYZ2;
nreg = 2;
Expand All @@ -1230,19 +1226,19 @@ struct alignas(32) GIFPath
case 5:
break;
case 6:
if (regs.U32[0] == 0x03040103 && regs.U32[1] == 0x00000401)
if (regs.U64[0] == 0x0000040103040103ULL)
{
type = TYPE_UVRGBAXYZF2;
nreg = 3;
nloop *= 2;
}
else if (regs.U32[0] == 0x01040301 && regs.U32[1] == 0x00000403)
else if (regs.U64[0] == 0x0000040301040301ULL)
{
type = TYPE_RGBAUVXYZF2;
nreg = 3;
nloop *= 2;
}
else if (regs.U32[0] == 0x03050103 && regs.U32[1] == 0x00000501)
else if (regs.U64[0] == 0x0000050103050103ULL)
{
type = TYPE_UVRGBAXYZ2;
nreg = 3;
Expand All @@ -1255,7 +1251,7 @@ struct alignas(32) GIFPath
break;
case 9:
// ffx
if (regs.U32[0] == 0x02040102 && regs.U32[1] == 0x01020401 && regs.U32[2] == 0x00000004)
if (regs.U64[0] == 0x0102040102040102ULL && regs.U32[2] == 0x00000004)
{
type = TYPE_STQRGBAXYZF2;
nreg = 3;
Expand All @@ -1268,7 +1264,7 @@ struct alignas(32) GIFPath
break;
case 12:
// dq8 (not many, mostly 040102)
if (regs.U32[0] == 0x02040102 && regs.U32[1] == 0x01020401 && regs.U32[2] == 0x04010204)
if (regs.U64[0] == 0x0102040102040102ULL && regs.U32[2] == 0x04010204)
{
type = TYPE_STQRGBAXYZF2;
nreg = 3;
Expand Down
Loading

0 comments on commit 7b97251

Please sign in to comment.