diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h index 352ed39ba008f..b429f9c268d5b 100644 --- a/pcsx2/GS/GSRegs.h +++ b/pcsx2/GS/GSRegs.h @@ -85,8 +85,17 @@ enum GIF_REG enum GIF_REG_COMPLEX { - GIF_REG_STQRGBAXYZF2 = 0x00, - GIF_REG_STQRGBAXYZ2 = 0x01, + GIF_REG_RGBAXYZF2 = 0x00, + GIF_REG_STQXYZF2 = 0x01, + GIF_REG_UVXYZF2 = 0x02, + GIF_REG_RGBAXYZ2 = 0x03, + GIF_REG_STQXYZ2 = 0x04, + GIF_REG_UVXYZ2 = 0x05, + GIF_REG_STQRGBAXYZF2 = 0x06, + GIF_REG_UVRGBAXYZF2 = 0x07, + GIF_REG_STQRGBAXYZ2 = 0x08, + GIF_REG_UVRGBAXYZ2 = 0x09, + GIF_REG_RGBAUVXYZF2 = 0x0A, }; enum GIF_A_D_REG @@ -1112,6 +1121,23 @@ REG128_SET(GIFPackedReg) GIFPackedNOP NOP; REG_SET_END +enum PATH_TYPE +{ + TYPE_RGBAXYZF2, + TYPE_STQXYZF2, + TYPE_UVXYZF2, + TYPE_RGBAXYZ2, + TYPE_STQXYZ2, + TYPE_UVXYZ2, + TYPE_STQRGBAXYZF2, + TYPE_UVRGBAXYZF2, + TYPE_STQRGBAXYZ2, + TYPE_UVRGBAXYZ2, + TYPE_RGBAUVXYZF2, + TYPE_ADONLY, + TYPE_UNKNOWN, +}; + struct alignas(32) GIFPath { GIFTag tag; @@ -1121,34 +1147,24 @@ struct alignas(32) GIFPath u32 type; GSVector4i regs; - enum - { - TYPE_UNKNOWN, - TYPE_ADONLY, - TYPE_STQRGBAXYZF2, - TYPE_STQRGBAXYZ2 - }; - __forceinline void SetTag(const void* mem) { const GIFTag* RESTRICT src = (const GIFTag*)mem; // the compiler has a hard time not reloading every time a field of src is accessed - u32 a = src->U32[0]; - u32 b = src->U32[1]; + const u64 a = src->U64[0]; - tag.U32[0] = a; - tag.U32[1] = b; + tag.U64[0] = a; nloop = a & 0x7fff; if (nloop == 0) return; - GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though + const GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though - nreg = (b & 0xf0000000) ? (b >> 28) : 16; // src->NREG + nreg = (a & 0xf000000000000000ULL) ? (a >> 60) : 16; // src->NREG regs = v.upl8(v >> 4) & GSVector4i::x0f(nreg); reg = 0; @@ -1156,32 +1172,77 @@ struct alignas(32) GIFPath if (tag.FLG == GIF_FLG_PACKED) { - if (regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) + if (regs.eq8(GSVector4i::cxpr(0x0e0e0e0e)).mask() == (1 << nreg) - 1) { type = TYPE_ADONLY; } - else + else if(nloop > 1) { + const u64 val = regs.U64[0]; + switch (nreg) { case 1: break; case 2: + if ((val >> 8) == 0x04) + { + type = (val >= 0x401 && val <= 0x403) ? (TYPE_RGBAXYZF2 + (val & 0xf) - 1) : type; + } + else if ((val >> 8) == 0x05) + { + type = (val >= 0x501 && val <= 0x503) ? (TYPE_RGBAXYZ2 + (val & 0xf) - 1) : type; + } break; case 3: - // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f) - if (regs.U32[0] == 0x00040102) - type = TYPE_STQRGBAXYZF2; - // GoW (has other crazy formats, like ...030503050103) - if (regs.U32[0] == 0x00050102) - type = TYPE_STQRGBAXYZ2; - // TODO: common types with UV instead + if ((val >> 16) == 0x04) + { + if ((val & 0xFF00) == 0x0100) + type = (val >= 0x040102 && val <= 0x040103) ? (TYPE_STQRGBAXYZF2 + (val & 0xf) - 2) : type; + else if (val == 0x00040301) + type = TYPE_RGBAUVXYZF2; + + } + else if ((val >> 16) == 0x05) + { + type = (val >= 0x050102 && val <= 0x050103) ? (TYPE_STQRGBAXYZ2 + (val & 0xf) - 2) : type; + } break; case 4: + if (val == 0x04030403) + { + type = TYPE_UVXYZF2; + nreg = 2; + nloop *= 2; + } + else if (val == 0x05030503) + { + type = TYPE_UVXYZ2; + nreg = 2; + nloop *= 2; + } break; case 5: break; case 6: + if (val == 0x0000040103040103ULL) + { + type = TYPE_UVRGBAXYZF2; + nreg = 3; + nloop *= 2; + } + else if (val == 0x0000040301040301ULL) + { + type = TYPE_RGBAUVXYZF2; + nreg = 3; + nloop *= 2; + } + else if (val == 0x0000050103050103ULL) + { + type = TYPE_UVRGBAXYZ2; + nreg = 3; + nloop *= 2; + } break; case 7: break; @@ -1189,7 +1250,7 @@ struct alignas(32) GIFPath break; case 9: // ffx - if (regs.U32[0] == 0x02040102 && regs.U32[1] == 0x01020401 && regs.U32[2] == 0x00000004) + if (val == 0x0102040102040102ULL && regs.U32[2] == 0x00000004) { type = TYPE_STQRGBAXYZF2; nreg = 3; @@ -1202,7 +1263,7 @@ struct alignas(32) GIFPath break; case 12: // dq8 (not many, mostly 040102) - if (regs.U32[0] == 0x02040102 && regs.U32[1] == 0x01020401 && regs.U32[2] == 0x04010204) + if (val == 0x0102040102040102ULL && regs.U32[2] == 0x04010204) { type = TYPE_STQRGBAXYZF2; nreg = 3; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 6cacd46779277..ae8839d2cb867 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -210,7 +210,16 @@ void GSState::SetPrimHandlers() m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2; \ m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2; \ m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2; \ - m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2; + m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2; \ + m_fpGIFPackedRegHandlerUVRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerUVRGBAXYZ2; \ + m_fpGIFPackedRegHandlerRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerRGBAXYZ2; \ + m_fpGIFPackedRegHandlerRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerRGBAXYZF2; \ + m_fpGIFPackedRegHandlerSTQXYZ2[P] = &GSState::GIFPackedRegHandlerSTQXYZ2; \ + m_fpGIFPackedRegHandlerSTQXYZF2[P] = &GSState::GIFPackedRegHandlerSTQXYZF2; \ + m_fpGIFPackedRegHandlerUVXYZ2[P] = &GSState::GIFPackedRegHandlerUVXYZ2; \ + m_fpGIFPackedRegHandlerUVXYZF2[P] = &GSState::GIFPackedRegHandlerUVXYZF2; \ + m_fpGIFPackedRegHandlerRGBAUVXYZF2[P] = &GSState::GIFPackedRegHandlerRGBAUVXYZF2; \ + m_fpGIFPackedRegHandlerUVRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerUVRGBAXYZF2; SetHandlerXYZ(GS_POINTLIST, true, false); SetHandlerXYZ(GS_LINELIST, auto_flush, index_swap); @@ -595,33 +604,37 @@ void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r) template void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r) { - if (!adc || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET))) + const bool skip = adc || r->XYZF2.Skip(); + + if (!skip || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET))) CheckFlushes(); - GSVector4i xy = GSVector4i::loadl(&r->U64[0]); - GSVector4i zf = GSVector4i::loadl(&r->U64[1]); + GSVector4i xy = GSVector4i::loadnt(r); + GSVector4i zf = xy.zwzw(); xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); m_v.m[1] = xy.upl32(zf); - VertexKick(adc ? 1 : r->XYZF2.Skip()); + VertexKick(skip); } template void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) { - if (!adc || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET))) + const bool skip = adc || r->XYZ2.Skip(); + + if (!skip || GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET))) CheckFlushes(); - const GSVector4i xy = GSVector4i::loadl(&r->U64[0]); - const GSVector4i z = GSVector4i::loadl(&r->U64[1]); + const GSVector4i xy = GSVector4i::loadnt(r); + const GSVector4i z = xy.zzzz(); const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); - VertexKick(adc ? 1 : r->XYZ2.Skip()); + VertexKick(skip); } void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r) @@ -643,40 +656,64 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3 { ASSERT(size > 0 && size % 3 == 0); - bool flushes_checked = false; + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + const GSVector4i ffffff_mask = GSVector4i::x00ffffff().upl32(ff_mask); - if (GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET))) + while (r < r_end) { - flushes_checked = true; - CheckFlushes(); + const GSVector4i st = GSVector4i::loadnt(&r[0]); + GSVector4i q = st.zzzz(); + const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16(); + + q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ + + m_v.m[0] = st.upl64(rgba.upl32(q)); + + GSVector4i xy = GSVector4i::loadnt(&r[2]); + GSVector4i zf = xy.zwzw(); + xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); + zf = zf.srl32(4) & ffffff_mask; + + m_v.m[1] = xy.upl32(zf); + + VertexKick(r[2].XYZF2.Skip()); + + r += 3; } + m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time +} + +template +void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 3 == 0); + + CheckFlushes(); + const GIFPackedReg* RESTRICT r_end = r + size; + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); while (r < r_end) { - const GSVector4i st = GSVector4i::loadl(&r[0].U64[0]); - GSVector4i q = GSVector4i::loadl(&r[0].U64[1]); - const GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16(); + const GSVector4i st = GSVector4i::loadnt(&r[0]); + GSVector4i q = st.zzzz(); + const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16(); q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ - m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one + m_v.m[0] = st.upl64(rgba.upl32(q)); - GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]); - GSVector4i zf = GSVector4i::loadl(&r[2].U64[1]); - xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); - zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); + const GSVector4i xy = GSVector4i::loadnt(&r[2]); + const GSVector4i z = xy.zzzz(); + const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); - m_v.m[1] = xy.upl32(zf); // TODO: only store the last one + m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); - const bool skip = r[2].XYZF2.Skip(); - if (!flushes_checked && !skip) - { - flushes_checked = true; - CheckFlushes(); - } - VertexKick(skip); + VertexKick(r[2].XYZ2.Skip()); r += 3; } @@ -685,47 +722,286 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3 } template -void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size) +void GSState::GIFPackedRegHandlerUVRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size) { ASSERT(size > 0 && size % 3 == 0); - bool flushes_checked = false; - if (GSUtil::GetPrimClass(m_prev_env.PRIM.PRIM) != GSUtil::GetPrimClass(m_env.PRIM.PRIM) || (m_dirty_gs_regs & (1 << DIRTY_REG_XYOFFSET))) + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + + const GSVector4i st = GSVector4i::loadl(&m_v.ST); + const GSVector4i q = GSVector4i::loadl(&m_v.RGBAQ.U32[1]).blend8(GSVector4i::cast(GSVector4::m_one), GSVector4i::loadl(&m_v.RGBAQ.U32[1]) == GSVector4i::zero()); + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff); + + while (r < r_end) { - flushes_checked = true; - CheckFlushes(); + const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16(); + + m_v.m[0] = st.upl64(rgba.upl32(q)); + + const GSVector4i xy = GSVector4i::loadnt(&r[2]); + const GSVector4i z = xy.zzzz(); + const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); + + const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff; + + m_v.m[1] = xyz.upl64(uv.ps32(uv)); + + VertexKick(r[2].XYZ2.Skip()); + + r += 3; } +} + +template +void GSState::GIFPackedRegHandlerRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 2 == 0); + + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + + const GSVector4i st = GSVector4i::loadl(&m_v.ST); + const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]); + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + + while (r < r_end) + { + const GSVector4i rgba = (GSVector4i::load(&r[0]) & ff_mask).ps32().pu16(); + + m_v.m[0] = st.upl64(rgba.upl32(q)); + + const GSVector4i xy = GSVector4i::loadnt(&r[1]); + const GSVector4i z = xy.zzzz(); + const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); + + m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); + + VertexKick(r[1].XYZ2.Skip()); + + r += 2; + } +} + +template +void GSState::GIFPackedRegHandlerRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 2 == 0); + + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + + const GSVector4i st = GSVector4i::loadl(&m_v.ST); + const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]); + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask); + + while (r < r_end) + { + const GSVector4i rgba = (GSVector4i::load(&r[0]) & ff_mask).ps32().pu16(); + + m_v.m[0] = st.upl64(rgba.upl32(q)); + + GSVector4i xy = GSVector4i::loadnt(&r[1]); + GSVector4i zf = xy.zwzw(); + xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); + zf = zf.srl32(4) & ffffff_mask; + + m_v.m[1] = xy.upl32(zf); + + VertexKick(r[1].XYZF2.Skip()); + + r += 2; + } +} + +template +void GSState::GIFPackedRegHandlerUVRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 3 == 0); + + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + + const GSVector4i st = GSVector4i::loadl(&m_v.ST); + const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]); + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff); + const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask); + + while (r < r_end) + { + const GSVector4i rgba = (GSVector4i::load(&r[1]) & ff_mask).ps32().pu16(); + + m_v.m[0] = st.upl64(rgba.upl32(q)); + + GSVector4i xy = GSVector4i::loadnt(&r[2]); + GSVector4i zf = xy.zwzw(); + + const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff; + + xy = xy.upl16(xy.srl<4>()).upl32(uv.ps32(uv)); + zf = zf.srl32(4) & ffffff_mask; + + m_v.m[1] = xy.upl32(zf); + + VertexKick(r[2].XYZF2.Skip()); + + r += 3; + } +} + +template +void GSState::GIFPackedRegHandlerRGBAUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 3 == 0); + + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + + const GSVector4i st = GSVector4i::loadl(&m_v.ST); + const GSVector4i q = GSVector4i(m_v.RGBAQ.U32[1]); + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff); + const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask); + + while (r < r_end) + { + const GSVector4i rgba = (GSVector4i::load(&r[0]) & ff_mask).ps32().pu16(); + + m_v.m[0] = st.upl64(rgba.upl32(q)); + + GSVector4i xy = GSVector4i::loadnt(&r[2]); + GSVector4i zf = xy.zwzw(); + + const GSVector4i uv = GSVector4i::loadl(&r[1]) & mask_3fff; + + xy = xy.upl16(xy.srl<4>()).upl32(uv.ps32(uv)); + zf = zf.srl32(4) & ffffff_mask; + + m_v.m[1] = xy.upl32(zf); + + VertexKick(r[2].XYZF2.Skip()); + + r += 3; + } +} + +template +void GSState::GIFPackedRegHandlerSTQXYZ2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 2 == 0); + + CheckFlushes(); const GIFPackedReg* RESTRICT r_end = r + size; while (r < r_end) { const GSVector4i st = GSVector4i::loadl(&r[0].U64[0]); - GSVector4i q = GSVector4i::loadl(&r[0].U64[1]); - const GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16(); + m_v.ST.U64 = st.U64[0]; - q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ + const GSVector4i xy = GSVector4i::loadnt(&r[1]); + const GSVector4i z = xy.zzzz(); + const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); + + m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); + + VertexKick(r[1].XYZ2.Skip()); + + r += 2; + } + + m_q = r[-2].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time +} + +template +void GSState::GIFPackedRegHandlerSTQXYZF2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 2 == 0); + + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + constexpr GSVector4i ff_mask = GSVector4i::cxpr(0x000000ff); + const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(ff_mask); - m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one + while (r < r_end) + { + const GSVector4i st = GSVector4i::loadl(&r[0].U64[0]); + m_v.ST.U64 = st.U64[0]; + + GSVector4i xy = GSVector4i::loadnt(&r[1]); + GSVector4i zf = xy.zwzw(); + xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); + zf = zf.srl32(4) & ffffff_mask; + + m_v.m[1] = xy.upl32(zf); + + VertexKick(r[1].XYZF2.Skip()); + + r += 2; + } + + m_q = r[-2].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time +} + +template +void GSState::GIFPackedRegHandlerUVXYZ2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 2 == 0); + + CheckFlushes(); - const GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]); - const GSVector4i z = GSVector4i::loadl(&r[2].U64[1]); + const GIFPackedReg* RESTRICT r_end = r + size; + constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff); + + while (r < r_end) + { + GSVector4i xy = GSVector4i::loadnt(&r[1]); + GSVector4i z = xy.zzzz(); const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); + const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff; - m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one + m_v.m[1] = xyz.upl64(uv.ps32(uv)); - const bool skip = r[2].XYZF2.Skip(); - if (!flushes_checked && !skip) - { - flushes_checked = true; - CheckFlushes(); - } - VertexKick(skip); + VertexKick(r[1].XYZ2.Skip()); - r += 3; + r += 2; } +} - m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time +template +void GSState::GIFPackedRegHandlerUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size) +{ + ASSERT(size > 0 && size % 2 == 0); + + CheckFlushes(); + + const GIFPackedReg* RESTRICT r_end = r + size; + constexpr GSVector4i mask_3fff = GSVector4i::cxpr(0x00003fff); + const GSVector4i ffffff_mask = GSVector4i::cxpr(0x00ffffff).upl32(GSVector4i::cxpr(0x000000ff)); + + while (r < r_end) + { + GSVector4i xy = GSVector4i::loadnt(&r[1]); + GSVector4i zf = xy.zwzw(); + const GSVector4i uv = GSVector4i::loadl(&r[0]) & mask_3fff; + xy = xy.upl16(xy.srl<4>()).upl32(uv.ps32(uv)); + zf = zf.srl32(4) & ffffff_mask; + + m_v.m[1] = xy.upl32(zf); + + VertexKick(r[1].XYZF2.Skip()); + + r += 2; + } } void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size) @@ -2328,6 +2604,8 @@ void GSState::Transfer(const u8* mem, u32 size) if (path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED) ApplyPRIM(path.tag.PRIM); } + + continue; } else { @@ -2358,7 +2636,7 @@ void GSState::Transfer(const u8* mem, u32 size) switch (path.type) { - case GIFPath::TYPE_UNKNOWN: + case PATH_TYPE::TYPE_UNKNOWN: { u32 reg = 0; @@ -2372,29 +2650,19 @@ void GSState::Transfer(const u8* mem, u32 size) } while (--total > 0); } break; - case GIFPath::TYPE_ADONLY: // very common + case PATH_TYPE::TYPE_ADONLY: // very common do { (this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR & 0x7F])(&((GIFPackedReg*)mem)->r); mem += sizeof(GIFPackedReg); } while (--total > 0); - - break; - case GIFPath::TYPE_STQRGBAXYZF2: // majority of the vertices are formatted like this - (this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2])((GIFPackedReg*)mem, total); - - mem += total * sizeof(GIFPackedReg); - - break; - case GIFPath::TYPE_STQRGBAXYZ2: - (this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2])((GIFPackedReg*)mem, total); + break; + default: + (this->*m_fpGIFPackedRegHandlersC[path.type])((GIFPackedReg*)mem, total); mem += total * sizeof(GIFPackedReg); - - break; - default: - __assume(0); + break; } path.nloop = 0; @@ -2744,6 +3012,15 @@ void GSState::UpdateVertexKick() m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim]; m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_UVRGBAXYZ2] = m_fpGIFPackedRegHandlerUVRGBAXYZ2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_RGBAXYZ2] = m_fpGIFPackedRegHandlerRGBAXYZ2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_RGBAXYZF2] = m_fpGIFPackedRegHandlerRGBAXYZF2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_STQXYZ2] = m_fpGIFPackedRegHandlerSTQXYZ2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_STQXYZF2] = m_fpGIFPackedRegHandlerSTQXYZF2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_RGBAUVXYZF2] = m_fpGIFPackedRegHandlerRGBAUVXYZF2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_UVRGBAXYZF2] = m_fpGIFPackedRegHandlerUVRGBAXYZF2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_UVXYZ2] = m_fpGIFPackedRegHandlerUVXYZ2[prim]; + m_fpGIFPackedRegHandlersC[GIF_REG_UVXYZF2] = m_fpGIFPackedRegHandlerUVXYZF2[prim]; } void GSState::GrowVertexBuffer() diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 9e3fd1de29985..5766542e4c135 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -63,12 +63,31 @@ class GSState : public GSAlignedClass<32> typedef void (GSState::*GIFPackedRegHandlerC)(const GIFPackedReg* RESTRICT r, u32 size); - GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[2] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[11] = {}; GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8] = {}; GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVRGBAXYZ2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerRGBAXYZ2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerRGBAXYZF2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQXYZ2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQXYZF2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVXYZ2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVXYZF2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerRGBAUVXYZF2[8] = {}; + GIFPackedRegHandlerC m_fpGIFPackedRegHandlerUVRGBAXYZF2[8] = {}; template void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size); template void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerUVRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerSTQXYZ2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerSTQXYZF2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerUVXYZ2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerRGBAUVXYZF2(const GIFPackedReg* RESTRICT r, u32 size); + template void GIFPackedRegHandlerUVRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size); + void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size); template void ApplyTEX0(GIFRegTEX0& TEX0);