diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h
index 352ed39ba008f..b429f9c268d5b 100644
--- a/pcsx2/GS/GSRegs.h
+++ b/pcsx2/GS/GSRegs.h
@@ -85,8 +85,17 @@ enum GIF_REG
enum GIF_REG_COMPLEX
{
- GIF_REG_STQRGBAXYZF2 = 0x00,
- GIF_REG_STQRGBAXYZ2 = 0x01,
+ GIF_REG_RGBAXYZF2 = 0x00,
+ GIF_REG_STQXYZF2 = 0x01,
+ GIF_REG_UVXYZF2 = 0x02,
+ GIF_REG_RGBAXYZ2 = 0x03,
+ GIF_REG_STQXYZ2 = 0x04,
+ GIF_REG_UVXYZ2 = 0x05,
+ GIF_REG_STQRGBAXYZF2 = 0x06,
+ GIF_REG_UVRGBAXYZF2 = 0x07,
+ GIF_REG_STQRGBAXYZ2 = 0x08,
+ GIF_REG_UVRGBAXYZ2 = 0x09,
+ GIF_REG_RGBAUVXYZF2 = 0x0A,
};
enum GIF_A_D_REG
@@ -1112,6 +1121,23 @@ REG128_SET(GIFPackedReg)
GIFPackedNOP NOP;
REG_SET_END
+enum PATH_TYPE
+{
+ TYPE_RGBAXYZF2,
+ TYPE_STQXYZF2,
+ TYPE_UVXYZF2,
+ TYPE_RGBAXYZ2,
+ TYPE_STQXYZ2,
+ TYPE_UVXYZ2,
+ TYPE_STQRGBAXYZF2,
+ TYPE_UVRGBAXYZF2,
+ TYPE_STQRGBAXYZ2,
+ TYPE_UVRGBAXYZ2,
+ TYPE_RGBAUVXYZF2,
+ TYPE_ADONLY,
+ TYPE_UNKNOWN,
+};
+
struct alignas(32) GIFPath
{
GIFTag tag;
@@ -1121,34 +1147,24 @@ struct alignas(32) GIFPath
u32 type;
GSVector4i regs;
- enum
- {
- TYPE_UNKNOWN,
- TYPE_ADONLY,
- TYPE_STQRGBAXYZF2,
- TYPE_STQRGBAXYZ2
- };
-
__forceinline void SetTag(const void* mem)
{
const GIFTag* RESTRICT src = (const GIFTag*)mem;
// the compiler has a hard time not reloading every time a field of src is accessed
- u32 a = src->U32[0];
- u32 b = src->U32[1];
+ const u64 a = src->U64[0];
- tag.U32[0] = a;
- tag.U32[1] = b;
+ tag.U64[0] = a;
nloop = a & 0x7fff;
if (nloop == 0)
return;
- GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though
+ const GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though
- nreg = (b & 0xf0000000) ? (b >> 28) : 16; // src->NREG
+ nreg = (a & 0xf000000000000000ULL) ? (a >> 60) : 16; // src->NREG
regs = v.upl8(v >> 4) & GSVector4i::x0f(nreg);
reg = 0;
@@ -1156,32 +1172,77 @@ struct alignas(32) GIFPath
if (tag.FLG == GIF_FLG_PACKED)
{
- if (regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1)
+ if (regs.eq8(GSVector4i::cxpr(0x0e0e0e0e)).mask() == (1 << nreg) - 1)
{
type = TYPE_ADONLY;
}
- else
+ else if(nloop > 1)
{
+ const u64 val = regs.U64[0];
+
switch (nreg)
{
case 1:
break;
case 2:
+ if ((val >> 8) == 0x04)
+ {
+ type = (val >= 0x401 && val <= 0x403) ? (TYPE_RGBAXYZF2 + (val & 0xf) - 1) : type;
+ }
+ else if ((val >> 8) == 0x05)
+ {
+ type = (val >= 0x501 && val <= 0x503) ? (TYPE_RGBAXYZ2 + (val & 0xf) - 1) : type;
+ }
break;
case 3:
- // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f)
- if (regs.U32[0] == 0x00040102)
- type = TYPE_STQRGBAXYZF2;
- // GoW (has other crazy formats, like ...030503050103)
- if (regs.U32[0] == 0x00050102)
- type = TYPE_STQRGBAXYZ2;
- // TODO: common types with UV instead
+ if ((val >> 16) == 0x04)
+ {
+ if ((val & 0xFF00) == 0x0100)
+ type = (val >= 0x040102 && val <= 0x040103) ? (TYPE_STQRGBAXYZF2 + (val & 0xf) - 2) : type;
+ else if (val == 0x00040301)
+ type = TYPE_RGBAUVXYZF2;
+
+ }
+ else if ((val >> 16) == 0x05)
+ {
+ type = (val >= 0x050102 && val <= 0x050103) ? (TYPE_STQRGBAXYZ2 + (val & 0xf) - 2) : type;
+ }
break;
case 4:
+ if (val == 0x04030403)
+ {
+ type = TYPE_UVXYZF2;
+ nreg = 2;
+ nloop *= 2;
+ }
+ else if (val == 0x05030503)
+ {
+ type = TYPE_UVXYZ2;
+ nreg = 2;
+ nloop *= 2;
+ }
break;
case 5:
break;
case 6:
+ if (val == 0x0000040103040103ULL)
+ {
+ type = TYPE_UVRGBAXYZF2;
+ nreg = 3;
+ nloop *= 2;
+ }
+ else if (val == 0x0000040301040301ULL)
+ {
+ type = TYPE_RGBAUVXYZF2;
+ nreg = 3;
+ nloop *= 2;
+ }
+ else if (val == 0x0000050103050103ULL)
+ {
+ type = TYPE_UVRGBAXYZ2;
+ nreg = 3;
+ nloop *= 2;
+ }
break;
case 7:
break;
@@ -1189,7 +1250,7 @@ struct alignas(32) GIFPath
break;
case 9:
// ffx
- if (regs.U32[0] == 0x02040102 && regs.U32[1] == 0x01020401 && regs.U32[2] == 0x00000004)
+ if (val == 0x0102040102040102ULL && regs.U32[2] == 0x00000004)
{
type = TYPE_STQRGBAXYZF2;
nreg = 3;
@@ -1202,7 +1263,7 @@ struct alignas(32) GIFPath
break;
case 12:
// dq8 (not many, mostly 040102)
- if (regs.U32[0] == 0x02040102 && regs.U32[1] == 0x01020401 && regs.U32[2] == 0x04010204)
+ if (val == 0x0102040102040102ULL && regs.U32[2] == 0x04010204)
{
type = TYPE_STQRGBAXYZF2;
nreg = 3;
diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp
index 6cacd46779277..ae8839d2cb867 100644
--- a/pcsx2/GS/GSState.cpp
+++ b/pcsx2/GS/GSState.cpp
@@ -210,7 +210,16 @@ void GSState::SetPrimHandlers()
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2
; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2
; \
m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2
; \
- m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2
;
+ m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2
; \
+ m_fpGIFPackedRegHandlerUVRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerUVRGBAXYZ2
; \
+ m_fpGIFPackedRegHandlerRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerRGBAXYZ2
; \
+ m_fpGIFPackedRegHandlerRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerRGBAXYZF2
; \
+ m_fpGIFPackedRegHandlerSTQXYZ2[P] = &GSState::GIFPackedRegHandlerSTQXYZ2
; \
+ m_fpGIFPackedRegHandlerSTQXYZF2[P] = &GSState::GIFPackedRegHandlerSTQXYZF2
; \
+ m_fpGIFPackedRegHandlerUVXYZ2[P] = &GSState::GIFPackedRegHandlerUVXYZ2
; \
+ m_fpGIFPackedRegHandlerUVXYZF2[P] = &GSState::GIFPackedRegHandlerUVXYZF2
; \
+ m_fpGIFPackedRegHandlerRGBAUVXYZF2[P] = &GSState::GIFPackedRegHandlerRGBAUVXYZF2
; \
+ m_fpGIFPackedRegHandlerUVRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerUVRGBAXYZF2
;
SetHandlerXYZ(GS_POINTLIST, true, false);
SetHandlerXYZ(GS_LINELIST, auto_flush, index_swap);
@@ -595,33 +604,37 @@ void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
template
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{
- if (!adc || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET)))
+ const bool skip = adc || r->XYZF2.Skip();
+
+ if (!skip || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET)))
CheckFlushes();
- GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
- GSVector4i zf = GSVector4i::loadl(&r->U64[1]);
+ GSVector4i xy = GSVector4i::loadnt(r);
+ GSVector4i zf = xy.zwzw();
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
m_v.m[1] = xy.upl32(zf);
- VertexKick(adc ? 1 : r->XYZF2.Skip());
+ VertexKick(skip);
}
template
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{
- if (!adc || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET)))
+ const bool skip = adc || r->XYZ2.Skip();
+
+ if (!skip || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET)))
CheckFlushes();
- const GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
- const GSVector4i z = GSVector4i::loadl(&r->U64[1]);
+ const GSVector4i xy = GSVector4i::loadnt(r);
+ const GSVector4i z = xy.zzzz();
const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
- VertexKick(adc ? 1 : r->XYZ2.Skip());
+ VertexKick(skip);
}
void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
@@ -643,40 +656,64 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3
{
ASSERT(size > 0 && size % 3 == 0);
- bool flushes_checked = false;
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+ const GSVector4i ffffff_mask = GSVector4i::x00ffffff().upl32(ff_mask);
- if (GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET)))
+ while (r < r_end)
{
- flushes_checked = true;
- CheckFlushes();
+ const GSVector4i st = GSVector4i::loadnt(&r[0]);
+ GSVector4i q = st.zzzz();
+ const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16();
+
+ q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
+
+ m_v.m[0] = st.upl64(rgba.upl32(q));
+
+ GSVector4i xy = GSVector4i::loadnt(&r[2]);
+ GSVector4i zf = xy.zwzw();
+ xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
+ zf = zf.srl32(4) & ffffff_mask;
+
+ m_v.m[1] = xy.upl32(zf);
+
+ VertexKick(r[2].XYZF2.Skip());
+
+ r += 3;
}
+ m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
+}
+
+template
+void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 3 == 0);
+
+ CheckFlushes();
+
const GIFPackedReg* RESTRICT r_end = r + size;
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
while (r < r_end)
{
- const GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
- GSVector4i q = GSVector4i::loadl(&r[0].U64[1]);
- const GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
+ const GSVector4i st = GSVector4i::loadnt(&r[0]);
+ GSVector4i q = st.zzzz();
+ const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16();
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
- m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
+ m_v.m[0] = st.upl64(rgba.upl32(q));
- GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
- GSVector4i zf = GSVector4i::loadl(&r[2].U64[1]);
- xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
- zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
+ const GSVector4i xy = GSVector4i::loadnt(&r[2]);
+ const GSVector4i z = xy.zzzz();
+ const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
- m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
+ m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
- const bool skip = r[2].XYZF2.Skip();
- if (!flushes_checked && !skip)
- {
- flushes_checked = true;
- CheckFlushes();
- }
- VertexKick(skip);
+ VertexKick(r[2].XYZ2.Skip());
r += 3;
}
@@ -685,47 +722,286 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3
}
template
-void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
+void GSState::GIFPackedRegHandlerUVRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
{
ASSERT(size > 0 && size % 3 == 0);
- bool flushes_checked = false;
- if (GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET)))
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+
+ const GSVector4i st = GSVector4i::loadl(&m_v.ST);
+ const GSVector4i q = GSVector4i::loadl(&m_v.RGBAQ.U32[1]).blend8(GSVector4i::cast(GSVector4::m_one), GSVector4i::loadl(&m_v.RGBAQ.U32[1]) == GSVector4i::zero());
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+ constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff);
+
+ while (r < r_end)
{
- flushes_checked = true;
- CheckFlushes();
+ const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16();
+
+ m_v.m[0] = st.upl64(rgba.upl32(q));
+
+ const GSVector4i xy = GSVector4i::loadnt(&r[2]);
+ const GSVector4i z = xy.zzzz();
+ const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
+
+ const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff;
+
+ m_v.m[1] = xyz.upl64(uv.ps32(uv));
+
+ VertexKick(r[2].XYZ2.Skip());
+
+ r += 3;
}
+}
+
+template
+void GSState::GIFPackedRegHandlerRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 2 == 0);
+
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+
+ const GSVector4i st = GSVector4i::loadl(&m_v.ST);
+ const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]);
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+
+ while (r < r_end)
+ {
+ const GSVector4i rgba = (GSVector4i::load(&r[0]) & ff_mask).ps32().pu16();
+
+ m_v.m[0] = st.upl64(rgba.upl32(q));
+
+ const GSVector4i xy = GSVector4i::loadnt(&r[1]);
+ const GSVector4i z = xy.zzzz();
+ const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
+
+ m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
+
+ VertexKick(r[1].XYZ2.Skip());
+
+ r += 2;
+ }
+}
+
+template
+void GSState::GIFPackedRegHandlerRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 2 == 0);
+
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+
+ const GSVector4i st = GSVector4i::loadl(&m_v.ST);
+ const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]);
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+ const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask);
+
+ while (r < r_end)
+ {
+ const GSVector4i rgba = (GSVector4i::load(&r[0]) & ff_mask).ps32().pu16();
+
+ m_v.m[0] = st.upl64(rgba.upl32(q));
+
+ GSVector4i xy = GSVector4i::loadnt(&r[1]);
+ GSVector4i zf = xy.zwzw();
+ xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
+ zf = zf.srl32(4) & ffffff_mask;
+
+ m_v.m[1] = xy.upl32(zf);
+
+ VertexKick(r[1].XYZF2.Skip());
+
+ r += 2;
+ }
+}
+
+template
+void GSState::GIFPackedRegHandlerUVRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 3 == 0);
+
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+
+ const GSVector4i st = GSVector4i::loadl(&m_v.ST);
+ const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]);
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+ constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff);
+ const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask);
+
+ while (r < r_end)
+ {
+ const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16();
+
+ m_v.m[0] = st.upl64(rgba.upl32(q));
+
+ GSVector4i xy = GSVector4i::loadnt(&r[2]);
+ GSVector4i zf = xy.zwzw();
+
+ const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff;
+
+ xy = xy.upl16(xy.srl<4>()).upl32(uv.ps32(uv));
+ zf = zf.srl32(4) & ffffff_mask;
+
+ m_v.m[1] = xy.upl32(zf);
+
+ VertexKick(r[2].XYZF2.Skip());
+
+ r += 3;
+ }
+}
+
+template
+void GSState::GIFPackedRegHandlerRGBAUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 3 == 0);
+
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+
+ const GSVector4i st = GSVector4i::loadl(&m_v.ST);
+ const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]);
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+ constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff);
+ const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask);
+
+ while (r < r_end)
+ {
+ const GSVector4i rgba = (GSVector4i::load(&r[0]) & ff_mask).ps32().pu16();
+
+ m_v.m[0] = st.upl64(rgba.upl32(q));
+
+ GSVector4i xy = GSVector4i::loadnt(&r[2]);
+ GSVector4i zf = xy.zwzw();
+
+ const GSVector4i uv = GSVector4i::loadl(&r[1]) & mask_3fff;
+
+ xy = xy.upl16(xy.srl<4>()).upl32(uv.ps32(uv));
+ zf = zf.srl32(4) & ffffff_mask;
+
+ m_v.m[1] = xy.upl32(zf);
+
+ VertexKick(r[2].XYZF2.Skip());
+
+ r += 3;
+ }
+}
+
+template
+void GSState::GIFPackedRegHandlerSTQXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 2 == 0);
+
+ CheckFlushes();
const GIFPackedReg* RESTRICT r_end = r + size;
while (r < r_end)
{
const GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
- GSVector4i q = GSVector4i::loadl(&r[0].U64[1]);
- const GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
+ m_v.ST.U64 = st.U64[0];
- q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
+ const GSVector4i xy = GSVector4i::loadnt(&r[1]);
+ const GSVector4i z = xy.zzzz();
+ const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
+
+ m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
+
+ VertexKick(r[1].XYZ2.Skip());
+
+ r += 2;
+ }
+
+ m_q = r[-2].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
+}
+
+template
+void GSState::GIFPackedRegHandlerSTQXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 2 == 0);
+
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+ constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff);
+ const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask);
- m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
+ while (r < r_end)
+ {
+ const GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
+ m_v.ST.U64 = st.U64[0];
+
+ GSVector4i xy = GSVector4i::loadnt(&r[1]);
+ GSVector4i zf = xy.zwzw();
+ xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
+ zf = zf.srl32(4) & ffffff_mask;
+
+ m_v.m[1] = xy.upl32(zf);
+
+ VertexKick(r[1].XYZF2.Skip());
+
+ r += 2;
+ }
+
+ m_q = r[-2].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
+}
+
+template
+void GSState::GIFPackedRegHandlerUVXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 2 == 0);
+
+ CheckFlushes();
- const GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
- const GSVector4i z = GSVector4i::loadl(&r[2].U64[1]);
+ const GIFPackedReg* RESTRICT r_end = r + size;
+ constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff);
+
+ while (r < r_end)
+ {
+ GSVector4i xy = GSVector4i::loadnt(&r[1]);
+ GSVector4i z = xy.zzzz();
const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
+ const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff;
- m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one
+ m_v.m[1] = xyz.upl64(uv.ps32(uv));
- const bool skip = r[2].XYZF2.Skip();
- if (!flushes_checked && !skip)
- {
- flushes_checked = true;
- CheckFlushes();
- }
- VertexKick(skip);
+ VertexKick(r[1].XYZ2.Skip());
- r += 3;
+ r += 2;
}
+}
- m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
+template
+void GSState::GIFPackedRegHandlerUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
+{
+ ASSERT(size > 0 && size % 2 == 0);
+
+ CheckFlushes();
+
+ const GIFPackedReg* RESTRICT r_end = r + size;
+ constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff);
+ const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(GSVector4i::cxpr(0x000000ff));
+
+ while (r < r_end)
+ {
+ GSVector4i xy = GSVector4i::loadnt(&r[1]);
+ GSVector4i zf = xy.zwzw();
+ const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff;
+ xy = xy.upl16(xy.srl<4>()).upl32(uv.ps32(uv));
+ zf = zf.srl32(4) & ffffff_mask;
+
+ m_v.m[1] = xy.upl32(zf);
+
+ VertexKick(r[1].XYZF2.Skip());
+
+ r += 2;
+ }
}
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size)
@@ -2328,6 +2604,8 @@ void GSState::Transfer(const u8* mem, u32 size)
if (path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED)
ApplyPRIM(path.tag.PRIM);
}
+
+ continue;
}
else
{
@@ -2358,7 +2636,7 @@ void GSState::Transfer(const u8* mem, u32 size)
switch (path.type)
{
- case GIFPath::TYPE_UNKNOWN:
+ case PATH_TYPE::TYPE_UNKNOWN:
{
u32 reg = 0;
@@ -2372,29 +2650,19 @@ void GSState::Transfer(const u8* mem, u32 size)
} while (--total > 0);
}
break;
- case GIFPath::TYPE_ADONLY: // very common
+ case PATH_TYPE::TYPE_ADONLY: // very common
do
{
(this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR & 0x7F])(&((GIFPackedReg*)mem)->r);
mem += sizeof(GIFPackedReg);
} while (--total > 0);
-
- break;
- case GIFPath::TYPE_STQRGBAXYZF2: // majority of the vertices are formatted like this
- (this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2])((GIFPackedReg*)mem, total);
-
- mem += total * sizeof(GIFPackedReg);
-
- break;
- case GIFPath::TYPE_STQRGBAXYZ2:
- (this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2])((GIFPackedReg*)mem, total);
+ break;
+ default:
+ (this->*m_fpGIFPackedRegHandlersC[path.type])((GIFPackedReg*)mem, total);
mem += total * sizeof(GIFPackedReg);
-
- break;
- default:
- __assume(0);
+ break;
}
path.nloop = 0;
@@ -2744,6 +3012,15 @@ void GSState::UpdateVertexKick()
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_UVRGBAXYZ2] = m_fpGIFPackedRegHandlerUVRGBAXYZ2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_RGBAXYZ2] = m_fpGIFPackedRegHandlerRGBAXYZ2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_RGBAXYZF2] = m_fpGIFPackedRegHandlerRGBAXYZF2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_STQXYZ2] = m_fpGIFPackedRegHandlerSTQXYZ2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_STQXYZF2] = m_fpGIFPackedRegHandlerSTQXYZF2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_RGBAUVXYZF2] = m_fpGIFPackedRegHandlerRGBAUVXYZF2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_UVRGBAXYZF2] = m_fpGIFPackedRegHandlerUVRGBAXYZF2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_UVXYZ2] = m_fpGIFPackedRegHandlerUVXYZ2[prim];
+ m_fpGIFPackedRegHandlersC[GIF_REG_UVXYZF2] = m_fpGIFPackedRegHandlerUVXYZF2[prim];
}
void GSState::GrowVertexBuffer()
diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h
index 9e3fd1de29985..5766542e4c135 100644
--- a/pcsx2/GS/GSState.h
+++ b/pcsx2/GS/GSState.h
@@ -63,12 +63,31 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFPackedRegHandlerC)(const GIFPackedReg* RESTRICT r, u32 size);
- GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[2] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[11] = {};
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8] = {};
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVRGBAXYZ2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerRGBAXYZ2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerRGBAXYZF2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQXYZ2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQXYZF2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVXYZ2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVXYZF2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerRGBAUVXYZF2[8] = {};
+ GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVRGBAXYZF2[8] = {};
template void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
template void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerUVRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerSTQXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerSTQXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerUVXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerRGBAUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
+ template void GIFPackedRegHandlerUVRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
+
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size);
template void ApplyTEX0(GIFRegTEX0& TEX0);