diff --git a/pcsx2/GS/GSClut.cpp b/pcsx2/GS/GSClut.cpp index 468129cf9cbf14..99f1fd7202bbcd 100644 --- a/pcsx2/GS/GSClut.cpp +++ b/pcsx2/GS/GSClut.cpp @@ -837,8 +837,8 @@ void GSClut::Expand16(const u16* RESTRICT src, u32* RESTRICT dst, int w, const G c = s[i]; cl = c.upl16(c); ch = c.uph16(c); - d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)); - d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)); + d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16<15>()); + d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16<15>()); } } else @@ -848,8 +848,8 @@ void GSClut::Expand16(const u16* RESTRICT src, u32* RESTRICT dst, int w, const G c = s[i]; cl = c.upl16(c); ch = c.uph16(c); - d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero()); - d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)).andnot(ch == GSVector4i::zero()); + d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16<15>()).andnot(cl == GSVector4i::zero()); + d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16<15>()).andnot(ch == GSVector4i::zero()); } } } diff --git a/pcsx2/GS/GSDrawingContext.cpp b/pcsx2/GS/GSDrawingContext.cpp index 731001560d4687..f83e76beb039d9 100644 --- a/pcsx2/GS/GSDrawingContext.cpp +++ b/pcsx2/GS/GSDrawingContext.cpp @@ -97,7 +97,7 @@ void GSDrawingContext::UpdateScissor() scissor.in = rscissor + GSVector4i::cxpr(0, 0, 1, 1); // Fixed-point scissor min/max, used for rejecting primitives which are entirely outside. - scissor.cull = rscissor.sll32(4); + scissor.cull = rscissor.sll32<4>(); // Offset applied to vertices for culling, zw is for native resolution culling // We want to round subpixels down, because at least one pixel gets filled per scanline. diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h index 6ec5e8a6e1fa06..59ee0967900437 100644 --- a/pcsx2/GS/GSRegs.h +++ b/pcsx2/GS/GSRegs.h @@ -262,7 +262,6 @@ union name \ #define REG128_SET(name) \ union name \ { \ - __m128i m128; \ u64 U64[2]; \ u32 U32[4]; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 58a2d215467592..0fcaaeb847df24 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -594,7 +594,7 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r) GSVector4i zf = xy.zwzw(); xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); - zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); + zf = zf.srl32<4>() & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); m_v.m[1] = xy.upl32(zf); @@ -654,7 +654,7 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3 GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]); GSVector4i zf = GSVector4i::loadl(&r[2].U64[1]); xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); - zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); + zf = zf.srl32<4>() & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); m_v.m[1] = xy.upl32(zf); // TODO: only store the last one @@ -784,7 +784,7 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r) const GSVector4i xyzf = GSVector4i::loadl(&r->XYZF); const GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff())); - const GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32(24).srl<4>()); + const GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32<24>().srl<4>()); m_v.m[1] = xyz.upl64(uvf); @@ -3363,7 +3363,7 @@ __forceinline void GSState::VertexKick(u32 skip) // integer coordinates for culling at native resolution, and the fixed point for all others. The XY offset has to be // applied, then we split it into the fixed/integer portions. const GSVector4i xy_ofs = new_v1.xxxx().u16to32().sub32(m_xyof); - const GSVector4i xy = xy_ofs.blend32<12>(xy_ofs.sra32(4)); + const GSVector4i xy = xy_ofs.blend32<12>(xy_ofs.sra32<4>()); m_vertex.xy[xy_tail & 3] = xy; // Backup head for triangle fans so we can read it later, otherwise it'll get lost after the 4th vertex. diff --git a/pcsx2/GS/GSVector4.h b/pcsx2/GS/GSVector4.h index 7612d9a543ff77..9a51450da0d8b5 100644 --- a/pcsx2/GS/GSVector4.h +++ b/pcsx2/GS/GSVector4.h @@ -153,7 +153,7 @@ class alignas(16) GSVector4 { GSVector4i v((int)u); - *this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31))); + *this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32<31>())); } __forceinline explicit GSVector4(const GSVector4i& v); @@ -643,7 +643,7 @@ GSVector.h:2973:15: error: shadows template parm 'int i' { GSVector4i v = GSVector4i::load((int)u); - return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31))); + return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32<31>())); } template diff --git a/pcsx2/GS/GSVector4i.h b/pcsx2/GS/GSVector4i.h index 7ea2a3b5d9ca0d..dfc2683842b37c 100644 --- a/pcsx2/GS/GSVector4i.h +++ b/pcsx2/GS/GSVector4i.h @@ -706,14 +706,16 @@ class alignas(16) GSVector4i return GSVector4i(_mm_slli_si128(m, i)); } - __forceinline GSVector4i sra16(int i) const + template + __forceinline GSVector4i sra16() const { return GSVector4i(_mm_srai_epi16(m, i)); } - __forceinline GSVector4i sra16(__m128i i) const + template + __forceinline GSVector4i sra32() const { - return GSVector4i(_mm_sra_epi16(m, i)); + return GSVector4i(_mm_srai_epi32(m, i)); } __forceinline GSVector4i sra32(int i) const @@ -721,11 +723,6 @@ class alignas(16) GSVector4i return GSVector4i(_mm_srai_epi32(m, i)); } - __forceinline GSVector4i sra32(__m128i i) const - { - return GSVector4i(_mm_sra_epi32(m, i)); - } - #if _M_SSE >= 0x501 __forceinline GSVector4i srav32(const GSVector4i& v) const { @@ -733,26 +730,18 @@ class alignas(16) GSVector4i } #endif - __forceinline GSVector4i sll16(int i) const + template + __forceinline GSVector4i sll16() const { return GSVector4i(_mm_slli_epi16(m, i)); } - __forceinline GSVector4i sll16(__m128i i) const - { - return GSVector4i(_mm_sll_epi16(m, i)); - } - - __forceinline GSVector4i sll32(int i) const + template + __forceinline GSVector4i sll32() const { return GSVector4i(_mm_slli_epi32(m, i)); } - __forceinline GSVector4i sll32(__m128i i) const - { - return GSVector4i(_mm_sll_epi32(m, i)); - } - #if _M_SSE >= 0x501 __forceinline GSVector4i sllv32(const GSVector4i& v) const { @@ -760,14 +749,16 @@ class alignas(16) GSVector4i } #endif - __forceinline GSVector4i sll64(int i) const + template + __forceinline GSVector4i sll64() const { return GSVector4i(_mm_slli_epi64(m, i)); } - __forceinline GSVector4i sll64(__m128i i) const + template + __forceinline GSVector4i srl16() const { - return GSVector4i(_mm_sll_epi64(m, i)); + return GSVector4i(_mm_srli_epi16(m, i)); } __forceinline GSVector4i srl16(int i) const @@ -775,9 +766,10 @@ class alignas(16) GSVector4i return GSVector4i(_mm_srli_epi16(m, i)); } - __forceinline GSVector4i srl16(__m128i i) const + template + __forceinline GSVector4i srl32() const { - return GSVector4i(_mm_srl_epi16(m, i)); + return GSVector4i(_mm_srli_epi32(m, i)); } __forceinline GSVector4i srl32(int i) const @@ -785,11 +777,6 @@ class alignas(16) GSVector4i return GSVector4i(_mm_srli_epi32(m, i)); } - __forceinline GSVector4i srl32(__m128i i) const - { - return GSVector4i(_mm_srl_epi32(m, i)); - } - #if _M_SSE >= 0x501 __forceinline GSVector4i srlv32(const GSVector4i& v) const { @@ -797,16 +784,12 @@ class alignas(16) GSVector4i } #endif - __forceinline GSVector4i srl64(int i) const + template + __forceinline GSVector4i srl64() const { return GSVector4i(_mm_srli_epi64(m, i)); } - __forceinline GSVector4i srl64(__m128i i) const - { - return GSVector4i(_mm_srl_epi64(m, i)); - } - __forceinline GSVector4i add8(const GSVector4i& v) const { return GSVector4i(_mm_add_epi8(m, v.m)); @@ -945,7 +928,7 @@ class alignas(16) GSVector4i { // (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit) - return add16(a.sub16(*this).mul16l(f).sra16(4)); + return add16(a.sub16(*this).mul16l(f).sra16<4>()); } template @@ -957,7 +940,7 @@ class alignas(16) GSVector4i return mul16hrs(f); } - return sll16(shift + 1).mul16hs(f); + return sll16().mul16hs(f); } __forceinline bool eq(const GSVector4i& v) const @@ -1988,199 +1971,199 @@ class alignas(16) GSVector4i __forceinline static GSVector4i xffffffff() { return zero() == zero(); } - __forceinline static GSVector4i x00000001() { return xffffffff().srl32(31); } - __forceinline static GSVector4i x00000003() { return xffffffff().srl32(30); } - __forceinline static GSVector4i x00000007() { return xffffffff().srl32(29); } - __forceinline static GSVector4i x0000000f() { return xffffffff().srl32(28); } - __forceinline static GSVector4i x0000001f() { return xffffffff().srl32(27); } - __forceinline static GSVector4i x0000003f() { return xffffffff().srl32(26); } - __forceinline static GSVector4i x0000007f() { return xffffffff().srl32(25); } - __forceinline static GSVector4i x000000ff() { return xffffffff().srl32(24); } - __forceinline static GSVector4i x000001ff() { return xffffffff().srl32(23); } - __forceinline static GSVector4i x000003ff() { return xffffffff().srl32(22); } - __forceinline static GSVector4i x000007ff() { return xffffffff().srl32(21); } - __forceinline static GSVector4i x00000fff() { return xffffffff().srl32(20); } - __forceinline static GSVector4i x00001fff() { return xffffffff().srl32(19); } - __forceinline static GSVector4i x00003fff() { return xffffffff().srl32(18); } - __forceinline static GSVector4i x00007fff() { return xffffffff().srl32(17); } - __forceinline static GSVector4i x0000ffff() { return xffffffff().srl32(16); } - __forceinline static GSVector4i x0001ffff() { return xffffffff().srl32(15); } - __forceinline static GSVector4i x0003ffff() { return xffffffff().srl32(14); } - __forceinline static GSVector4i x0007ffff() { return xffffffff().srl32(13); } - __forceinline static GSVector4i x000fffff() { return xffffffff().srl32(12); } - __forceinline static GSVector4i x001fffff() { return xffffffff().srl32(11); } - __forceinline static GSVector4i x003fffff() { return xffffffff().srl32(10); } - __forceinline static GSVector4i x007fffff() { return xffffffff().srl32( 9); } - __forceinline static GSVector4i x00ffffff() { return xffffffff().srl32( 8); } - __forceinline static GSVector4i x01ffffff() { return xffffffff().srl32( 7); } - __forceinline static GSVector4i x03ffffff() { return xffffffff().srl32( 6); } - __forceinline static GSVector4i x07ffffff() { return xffffffff().srl32( 5); } - __forceinline static GSVector4i x0fffffff() { return xffffffff().srl32( 4); } - __forceinline static GSVector4i x1fffffff() { return xffffffff().srl32( 3); } - __forceinline static GSVector4i x3fffffff() { return xffffffff().srl32( 2); } - __forceinline static GSVector4i x7fffffff() { return xffffffff().srl32( 1); } - - __forceinline static GSVector4i x80000000() { return xffffffff().sll32(31); } - __forceinline static GSVector4i xc0000000() { return xffffffff().sll32(30); } - __forceinline static GSVector4i xe0000000() { return xffffffff().sll32(29); } - __forceinline static GSVector4i xf0000000() { return xffffffff().sll32(28); } - __forceinline static GSVector4i xf8000000() { return xffffffff().sll32(27); } - __forceinline static GSVector4i xfc000000() { return xffffffff().sll32(26); } - __forceinline static GSVector4i xfe000000() { return xffffffff().sll32(25); } - __forceinline static GSVector4i xff000000() { return xffffffff().sll32(24); } - __forceinline static GSVector4i xff800000() { return xffffffff().sll32(23); } - __forceinline static GSVector4i xffc00000() { return xffffffff().sll32(22); } - __forceinline static GSVector4i xffe00000() { return xffffffff().sll32(21); } - __forceinline static GSVector4i xfff00000() { return xffffffff().sll32(20); } - __forceinline static GSVector4i xfff80000() { return xffffffff().sll32(19); } - __forceinline static GSVector4i xfffc0000() { return xffffffff().sll32(18); } - __forceinline static GSVector4i xfffe0000() { return xffffffff().sll32(17); } - __forceinline static GSVector4i xffff0000() { return xffffffff().sll32(16); } - __forceinline static GSVector4i xffff8000() { return xffffffff().sll32(15); } - __forceinline static GSVector4i xffffc000() { return xffffffff().sll32(14); } - __forceinline static GSVector4i xffffe000() { return xffffffff().sll32(13); } - __forceinline static GSVector4i xfffff000() { return xffffffff().sll32(12); } - __forceinline static GSVector4i xfffff800() { return xffffffff().sll32(11); } - __forceinline static GSVector4i xfffffc00() { return xffffffff().sll32(10); } - __forceinline static GSVector4i xfffffe00() { return xffffffff().sll32( 9); } - __forceinline static GSVector4i xffffff00() { return xffffffff().sll32( 8); } - __forceinline static GSVector4i xffffff80() { return xffffffff().sll32( 7); } - __forceinline static GSVector4i xffffffc0() { return xffffffff().sll32( 6); } - __forceinline static GSVector4i xffffffe0() { return xffffffff().sll32( 5); } - __forceinline static GSVector4i xfffffff0() { return xffffffff().sll32( 4); } - __forceinline static GSVector4i xfffffff8() { return xffffffff().sll32( 3); } - __forceinline static GSVector4i xfffffffc() { return xffffffff().sll32( 2); } - __forceinline static GSVector4i xfffffffe() { return xffffffff().sll32( 1); } - - __forceinline static GSVector4i x0001() { return xffffffff().srl16(15); } - __forceinline static GSVector4i x0003() { return xffffffff().srl16(14); } - __forceinline static GSVector4i x0007() { return xffffffff().srl16(13); } - __forceinline static GSVector4i x000f() { return xffffffff().srl16(12); } - __forceinline static GSVector4i x001f() { return xffffffff().srl16(11); } - __forceinline static GSVector4i x003f() { return xffffffff().srl16(10); } - __forceinline static GSVector4i x007f() { return xffffffff().srl16( 9); } - __forceinline static GSVector4i x00ff() { return xffffffff().srl16( 8); } - __forceinline static GSVector4i x01ff() { return xffffffff().srl16( 7); } - __forceinline static GSVector4i x03ff() { return xffffffff().srl16( 6); } - __forceinline static GSVector4i x07ff() { return xffffffff().srl16( 5); } - __forceinline static GSVector4i x0fff() { return xffffffff().srl16( 4); } - __forceinline static GSVector4i x1fff() { return xffffffff().srl16( 3); } - __forceinline static GSVector4i x3fff() { return xffffffff().srl16( 2); } - __forceinline static GSVector4i x7fff() { return xffffffff().srl16( 1); } - - __forceinline static GSVector4i x8000() { return xffffffff().sll16(15); } - __forceinline static GSVector4i xc000() { return xffffffff().sll16(14); } - __forceinline static GSVector4i xe000() { return xffffffff().sll16(13); } - __forceinline static GSVector4i xf000() { return xffffffff().sll16(12); } - __forceinline static GSVector4i xf800() { return xffffffff().sll16(11); } - __forceinline static GSVector4i xfc00() { return xffffffff().sll16(10); } - __forceinline static GSVector4i xfe00() { return xffffffff().sll16( 9); } - __forceinline static GSVector4i xff00() { return xffffffff().sll16( 8); } - __forceinline static GSVector4i xff80() { return xffffffff().sll16( 7); } - __forceinline static GSVector4i xffc0() { return xffffffff().sll16( 6); } - __forceinline static GSVector4i xffe0() { return xffffffff().sll16( 5); } - __forceinline static GSVector4i xfff0() { return xffffffff().sll16( 4); } - __forceinline static GSVector4i xfff8() { return xffffffff().sll16( 3); } - __forceinline static GSVector4i xfffc() { return xffffffff().sll16( 2); } - __forceinline static GSVector4i xfffe() { return xffffffff().sll16( 1); } + __forceinline static GSVector4i x00000001() { return xffffffff().srl32<31>(); } + __forceinline static GSVector4i x00000003() { return xffffffff().srl32<30>(); } + __forceinline static GSVector4i x00000007() { return xffffffff().srl32<29>(); } + __forceinline static GSVector4i x0000000f() { return xffffffff().srl32<28>(); } + __forceinline static GSVector4i x0000001f() { return xffffffff().srl32<27>(); } + __forceinline static GSVector4i x0000003f() { return xffffffff().srl32<26>(); } + __forceinline static GSVector4i x0000007f() { return xffffffff().srl32<25>(); } + __forceinline static GSVector4i x000000ff() { return xffffffff().srl32<24>(); } + __forceinline static GSVector4i x000001ff() { return xffffffff().srl32<23>(); } + __forceinline static GSVector4i x000003ff() { return xffffffff().srl32<22>(); } + __forceinline static GSVector4i x000007ff() { return xffffffff().srl32<21>(); } + __forceinline static GSVector4i x00000fff() { return xffffffff().srl32<20>(); } + __forceinline static GSVector4i x00001fff() { return xffffffff().srl32<19>(); } + __forceinline static GSVector4i x00003fff() { return xffffffff().srl32<18>(); } + __forceinline static GSVector4i x00007fff() { return xffffffff().srl32<17>(); } + __forceinline static GSVector4i x0000ffff() { return xffffffff().srl32<16>(); } + __forceinline static GSVector4i x0001ffff() { return xffffffff().srl32<15>(); } + __forceinline static GSVector4i x0003ffff() { return xffffffff().srl32<14>(); } + __forceinline static GSVector4i x0007ffff() { return xffffffff().srl32<13>(); } + __forceinline static GSVector4i x000fffff() { return xffffffff().srl32<12>(); } + __forceinline static GSVector4i x001fffff() { return xffffffff().srl32<11>(); } + __forceinline static GSVector4i x003fffff() { return xffffffff().srl32<10>(); } + __forceinline static GSVector4i x007fffff() { return xffffffff().srl32< 9>(); } + __forceinline static GSVector4i x00ffffff() { return xffffffff().srl32< 8>(); } + __forceinline static GSVector4i x01ffffff() { return xffffffff().srl32< 7>(); } + __forceinline static GSVector4i x03ffffff() { return xffffffff().srl32< 6>(); } + __forceinline static GSVector4i x07ffffff() { return xffffffff().srl32< 5>(); } + __forceinline static GSVector4i x0fffffff() { return xffffffff().srl32< 4>(); } + __forceinline static GSVector4i x1fffffff() { return xffffffff().srl32< 3>(); } + __forceinline static GSVector4i x3fffffff() { return xffffffff().srl32< 2>(); } + __forceinline static GSVector4i x7fffffff() { return xffffffff().srl32< 1>(); } + + __forceinline static GSVector4i x80000000() { return xffffffff().sll32<31>(); } + __forceinline static GSVector4i xc0000000() { return xffffffff().sll32<30>(); } + __forceinline static GSVector4i xe0000000() { return xffffffff().sll32<29>(); } + __forceinline static GSVector4i xf0000000() { return xffffffff().sll32<28>(); } + __forceinline static GSVector4i xf8000000() { return xffffffff().sll32<27>(); } + __forceinline static GSVector4i xfc000000() { return xffffffff().sll32<26>(); } + __forceinline static GSVector4i xfe000000() { return xffffffff().sll32<25>(); } + __forceinline static GSVector4i xff000000() { return xffffffff().sll32<24>(); } + __forceinline static GSVector4i xff800000() { return xffffffff().sll32<23>(); } + __forceinline static GSVector4i xffc00000() { return xffffffff().sll32<22>(); } + __forceinline static GSVector4i xffe00000() { return xffffffff().sll32<21>(); } + __forceinline static GSVector4i xfff00000() { return xffffffff().sll32<20>(); } + __forceinline static GSVector4i xfff80000() { return xffffffff().sll32<19>(); } + __forceinline static GSVector4i xfffc0000() { return xffffffff().sll32<18>(); } + __forceinline static GSVector4i xfffe0000() { return xffffffff().sll32<17>(); } + __forceinline static GSVector4i xffff0000() { return xffffffff().sll32<16>(); } + __forceinline static GSVector4i xffff8000() { return xffffffff().sll32<15>(); } + __forceinline static GSVector4i xffffc000() { return xffffffff().sll32<14>(); } + __forceinline static GSVector4i xffffe000() { return xffffffff().sll32<13>(); } + __forceinline static GSVector4i xfffff000() { return xffffffff().sll32<12>(); } + __forceinline static GSVector4i xfffff800() { return xffffffff().sll32<11>(); } + __forceinline static GSVector4i xfffffc00() { return xffffffff().sll32<10>(); } + __forceinline static GSVector4i xfffffe00() { return xffffffff().sll32< 9>(); } + __forceinline static GSVector4i xffffff00() { return xffffffff().sll32< 8>(); } + __forceinline static GSVector4i xffffff80() { return xffffffff().sll32< 7>(); } + __forceinline static GSVector4i xffffffc0() { return xffffffff().sll32< 6>(); } + __forceinline static GSVector4i xffffffe0() { return xffffffff().sll32< 5>(); } + __forceinline static GSVector4i xfffffff0() { return xffffffff().sll32< 4>(); } + __forceinline static GSVector4i xfffffff8() { return xffffffff().sll32< 3>(); } + __forceinline static GSVector4i xfffffffc() { return xffffffff().sll32< 2>(); } + __forceinline static GSVector4i xfffffffe() { return xffffffff().sll32< 1>(); } + + __forceinline static GSVector4i x0001() { return xffffffff().srl16<15>(); } + __forceinline static GSVector4i x0003() { return xffffffff().srl16<14>(); } + __forceinline static GSVector4i x0007() { return xffffffff().srl16<13>(); } + __forceinline static GSVector4i x000f() { return xffffffff().srl16<12>(); } + __forceinline static GSVector4i x001f() { return xffffffff().srl16<11>(); } + __forceinline static GSVector4i x003f() { return xffffffff().srl16<10>(); } + __forceinline static GSVector4i x007f() { return xffffffff().srl16< 9>(); } + __forceinline static GSVector4i x00ff() { return xffffffff().srl16< 8>(); } + __forceinline static GSVector4i x01ff() { return xffffffff().srl16< 7>(); } + __forceinline static GSVector4i x03ff() { return xffffffff().srl16< 6>(); } + __forceinline static GSVector4i x07ff() { return xffffffff().srl16< 5>(); } + __forceinline static GSVector4i x0fff() { return xffffffff().srl16< 4>(); } + __forceinline static GSVector4i x1fff() { return xffffffff().srl16< 3>(); } + __forceinline static GSVector4i x3fff() { return xffffffff().srl16< 2>(); } + __forceinline static GSVector4i x7fff() { return xffffffff().srl16< 1>(); } + + __forceinline static GSVector4i x8000() { return xffffffff().sll16<15>(); } + __forceinline static GSVector4i xc000() { return xffffffff().sll16<14>(); } + __forceinline static GSVector4i xe000() { return xffffffff().sll16<13>(); } + __forceinline static GSVector4i xf000() { return xffffffff().sll16<12>(); } + __forceinline static GSVector4i xf800() { return xffffffff().sll16<11>(); } + __forceinline static GSVector4i xfc00() { return xffffffff().sll16<10>(); } + __forceinline static GSVector4i xfe00() { return xffffffff().sll16< 9>(); } + __forceinline static GSVector4i xff00() { return xffffffff().sll16< 8>(); } + __forceinline static GSVector4i xff80() { return xffffffff().sll16< 7>(); } + __forceinline static GSVector4i xffc0() { return xffffffff().sll16< 6>(); } + __forceinline static GSVector4i xffe0() { return xffffffff().sll16< 5>(); } + __forceinline static GSVector4i xfff0() { return xffffffff().sll16< 4>(); } + __forceinline static GSVector4i xfff8() { return xffffffff().sll16< 3>(); } + __forceinline static GSVector4i xfffc() { return xffffffff().sll16< 2>(); } + __forceinline static GSVector4i xfffe() { return xffffffff().sll16< 1>(); } __forceinline static GSVector4i xffffffff(const GSVector4i& v) { return v == v; } - __forceinline static GSVector4i x00000001(const GSVector4i& v) { return xffffffff(v).srl32(31); } - __forceinline static GSVector4i x00000003(const GSVector4i& v) { return xffffffff(v).srl32(30); } - __forceinline static GSVector4i x00000007(const GSVector4i& v) { return xffffffff(v).srl32(29); } - __forceinline static GSVector4i x0000000f(const GSVector4i& v) { return xffffffff(v).srl32(28); } - __forceinline static GSVector4i x0000001f(const GSVector4i& v) { return xffffffff(v).srl32(27); } - __forceinline static GSVector4i x0000003f(const GSVector4i& v) { return xffffffff(v).srl32(26); } - __forceinline static GSVector4i x0000007f(const GSVector4i& v) { return xffffffff(v).srl32(25); } - __forceinline static GSVector4i x000000ff(const GSVector4i& v) { return xffffffff(v).srl32(24); } - __forceinline static GSVector4i x000001ff(const GSVector4i& v) { return xffffffff(v).srl32(23); } - __forceinline static GSVector4i x000003ff(const GSVector4i& v) { return xffffffff(v).srl32(22); } - __forceinline static GSVector4i x000007ff(const GSVector4i& v) { return xffffffff(v).srl32(21); } - __forceinline static GSVector4i x00000fff(const GSVector4i& v) { return xffffffff(v).srl32(20); } - __forceinline static GSVector4i x00001fff(const GSVector4i& v) { return xffffffff(v).srl32(19); } - __forceinline static GSVector4i x00003fff(const GSVector4i& v) { return xffffffff(v).srl32(18); } - __forceinline static GSVector4i x00007fff(const GSVector4i& v) { return xffffffff(v).srl32(17); } - __forceinline static GSVector4i x0000ffff(const GSVector4i& v) { return xffffffff(v).srl32(16); } - __forceinline static GSVector4i x0001ffff(const GSVector4i& v) { return xffffffff(v).srl32(15); } - __forceinline static GSVector4i x0003ffff(const GSVector4i& v) { return xffffffff(v).srl32(14); } - __forceinline static GSVector4i x0007ffff(const GSVector4i& v) { return xffffffff(v).srl32(13); } - __forceinline static GSVector4i x000fffff(const GSVector4i& v) { return xffffffff(v).srl32(12); } - __forceinline static GSVector4i x001fffff(const GSVector4i& v) { return xffffffff(v).srl32(11); } - __forceinline static GSVector4i x003fffff(const GSVector4i& v) { return xffffffff(v).srl32(10); } - __forceinline static GSVector4i x007fffff(const GSVector4i& v) { return xffffffff(v).srl32( 9); } - __forceinline static GSVector4i x00ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 8); } - __forceinline static GSVector4i x01ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 7); } - __forceinline static GSVector4i x03ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 6); } - __forceinline static GSVector4i x07ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 5); } - __forceinline static GSVector4i x0fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 4); } - __forceinline static GSVector4i x1fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 3); } - __forceinline static GSVector4i x3fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 2); } - __forceinline static GSVector4i x7fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 1); } - - __forceinline static GSVector4i x80000000(const GSVector4i& v) { return xffffffff(v).sll32(31); } - __forceinline static GSVector4i xc0000000(const GSVector4i& v) { return xffffffff(v).sll32(30); } - __forceinline static GSVector4i xe0000000(const GSVector4i& v) { return xffffffff(v).sll32(29); } - __forceinline static GSVector4i xf0000000(const GSVector4i& v) { return xffffffff(v).sll32(28); } - __forceinline static GSVector4i xf8000000(const GSVector4i& v) { return xffffffff(v).sll32(27); } - __forceinline static GSVector4i xfc000000(const GSVector4i& v) { return xffffffff(v).sll32(26); } - __forceinline static GSVector4i xfe000000(const GSVector4i& v) { return xffffffff(v).sll32(25); } - __forceinline static GSVector4i xff000000(const GSVector4i& v) { return xffffffff(v).sll32(24); } - __forceinline static GSVector4i xff800000(const GSVector4i& v) { return xffffffff(v).sll32(23); } - __forceinline static GSVector4i xffc00000(const GSVector4i& v) { return xffffffff(v).sll32(22); } - __forceinline static GSVector4i xffe00000(const GSVector4i& v) { return xffffffff(v).sll32(21); } - __forceinline static GSVector4i xfff00000(const GSVector4i& v) { return xffffffff(v).sll32(20); } - __forceinline static GSVector4i xfff80000(const GSVector4i& v) { return xffffffff(v).sll32(19); } - __forceinline static GSVector4i xfffc0000(const GSVector4i& v) { return xffffffff(v).sll32(18); } - __forceinline static GSVector4i xfffe0000(const GSVector4i& v) { return xffffffff(v).sll32(17); } - __forceinline static GSVector4i xffff0000(const GSVector4i& v) { return xffffffff(v).sll32(16); } - __forceinline static GSVector4i xffff8000(const GSVector4i& v) { return xffffffff(v).sll32(15); } - __forceinline static GSVector4i xffffc000(const GSVector4i& v) { return xffffffff(v).sll32(14); } - __forceinline static GSVector4i xffffe000(const GSVector4i& v) { return xffffffff(v).sll32(13); } - __forceinline static GSVector4i xfffff000(const GSVector4i& v) { return xffffffff(v).sll32(12); } - __forceinline static GSVector4i xfffff800(const GSVector4i& v) { return xffffffff(v).sll32(11); } - __forceinline static GSVector4i xfffffc00(const GSVector4i& v) { return xffffffff(v).sll32(10); } - __forceinline static GSVector4i xfffffe00(const GSVector4i& v) { return xffffffff(v).sll32( 9); } - __forceinline static GSVector4i xffffff00(const GSVector4i& v) { return xffffffff(v).sll32( 8); } - __forceinline static GSVector4i xffffff80(const GSVector4i& v) { return xffffffff(v).sll32( 7); } - __forceinline static GSVector4i xffffffc0(const GSVector4i& v) { return xffffffff(v).sll32( 6); } - __forceinline static GSVector4i xffffffe0(const GSVector4i& v) { return xffffffff(v).sll32( 5); } - __forceinline static GSVector4i xfffffff0(const GSVector4i& v) { return xffffffff(v).sll32( 4); } - __forceinline static GSVector4i xfffffff8(const GSVector4i& v) { return xffffffff(v).sll32( 3); } - __forceinline static GSVector4i xfffffffc(const GSVector4i& v) { return xffffffff(v).sll32( 2); } - __forceinline static GSVector4i xfffffffe(const GSVector4i& v) { return xffffffff(v).sll32( 1); } - - __forceinline static GSVector4i x0001(const GSVector4i& v) { return xffffffff(v).srl16(15); } - __forceinline static GSVector4i x0003(const GSVector4i& v) { return xffffffff(v).srl16(14); } - __forceinline static GSVector4i x0007(const GSVector4i& v) { return xffffffff(v).srl16(13); } - __forceinline static GSVector4i x000f(const GSVector4i& v) { return xffffffff(v).srl16(12); } - __forceinline static GSVector4i x001f(const GSVector4i& v) { return xffffffff(v).srl16(11); } - __forceinline static GSVector4i x003f(const GSVector4i& v) { return xffffffff(v).srl16(10); } - __forceinline static GSVector4i x007f(const GSVector4i& v) { return xffffffff(v).srl16( 9); } - __forceinline static GSVector4i x00ff(const GSVector4i& v) { return xffffffff(v).srl16( 8); } - __forceinline static GSVector4i x01ff(const GSVector4i& v) { return xffffffff(v).srl16( 7); } - __forceinline static GSVector4i x03ff(const GSVector4i& v) { return xffffffff(v).srl16( 6); } - __forceinline static GSVector4i x07ff(const GSVector4i& v) { return xffffffff(v).srl16( 5); } - __forceinline static GSVector4i x0fff(const GSVector4i& v) { return xffffffff(v).srl16( 4); } - __forceinline static GSVector4i x1fff(const GSVector4i& v) { return xffffffff(v).srl16( 3); } - __forceinline static GSVector4i x3fff(const GSVector4i& v) { return xffffffff(v).srl16( 2); } - __forceinline static GSVector4i x7fff(const GSVector4i& v) { return xffffffff(v).srl16( 1); } - - __forceinline static GSVector4i x8000(const GSVector4i& v) { return xffffffff(v).sll16(15); } - __forceinline static GSVector4i xc000(const GSVector4i& v) { return xffffffff(v).sll16(14); } - __forceinline static GSVector4i xe000(const GSVector4i& v) { return xffffffff(v).sll16(13); } - __forceinline static GSVector4i xf000(const GSVector4i& v) { return xffffffff(v).sll16(12); } - __forceinline static GSVector4i xf800(const GSVector4i& v) { return xffffffff(v).sll16(11); } - __forceinline static GSVector4i xfc00(const GSVector4i& v) { return xffffffff(v).sll16(10); } - __forceinline static GSVector4i xfe00(const GSVector4i& v) { return xffffffff(v).sll16( 9); } - __forceinline static GSVector4i xff00(const GSVector4i& v) { return xffffffff(v).sll16( 8); } - __forceinline static GSVector4i xff80(const GSVector4i& v) { return xffffffff(v).sll16( 7); } - __forceinline static GSVector4i xffc0(const GSVector4i& v) { return xffffffff(v).sll16( 6); } - __forceinline static GSVector4i xffe0(const GSVector4i& v) { return xffffffff(v).sll16( 5); } - __forceinline static GSVector4i xfff0(const GSVector4i& v) { return xffffffff(v).sll16( 4); } - __forceinline static GSVector4i xfff8(const GSVector4i& v) { return xffffffff(v).sll16( 3); } - __forceinline static GSVector4i xfffc(const GSVector4i& v) { return xffffffff(v).sll16( 2); } - __forceinline static GSVector4i xfffe(const GSVector4i& v) { return xffffffff(v).sll16( 1); } + __forceinline static GSVector4i x00000001(const GSVector4i& v) { return xffffffff(v).srl32<31>(); } + __forceinline static GSVector4i x00000003(const GSVector4i& v) { return xffffffff(v).srl32<30>(); } + __forceinline static GSVector4i x00000007(const GSVector4i& v) { return xffffffff(v).srl32<29>(); } + __forceinline static GSVector4i x0000000f(const GSVector4i& v) { return xffffffff(v).srl32<28>(); } + __forceinline static GSVector4i x0000001f(const GSVector4i& v) { return xffffffff(v).srl32<27>(); } + __forceinline static GSVector4i x0000003f(const GSVector4i& v) { return xffffffff(v).srl32<26>(); } + __forceinline static GSVector4i x0000007f(const GSVector4i& v) { return xffffffff(v).srl32<25>(); } + __forceinline static GSVector4i x000000ff(const GSVector4i& v) { return xffffffff(v).srl32<24>(); } + __forceinline static GSVector4i x000001ff(const GSVector4i& v) { return xffffffff(v).srl32<23>(); } + __forceinline static GSVector4i x000003ff(const GSVector4i& v) { return xffffffff(v).srl32<22>(); } + __forceinline static GSVector4i x000007ff(const GSVector4i& v) { return xffffffff(v).srl32<21>(); } + __forceinline static GSVector4i x00000fff(const GSVector4i& v) { return xffffffff(v).srl32<20>(); } + __forceinline static GSVector4i x00001fff(const GSVector4i& v) { return xffffffff(v).srl32<19>(); } + __forceinline static GSVector4i x00003fff(const GSVector4i& v) { return xffffffff(v).srl32<18>(); } + __forceinline static GSVector4i x00007fff(const GSVector4i& v) { return xffffffff(v).srl32<17>(); } + __forceinline static GSVector4i x0000ffff(const GSVector4i& v) { return xffffffff(v).srl32<16>(); } + __forceinline static GSVector4i x0001ffff(const GSVector4i& v) { return xffffffff(v).srl32<15>(); } + __forceinline static GSVector4i x0003ffff(const GSVector4i& v) { return xffffffff(v).srl32<14>(); } + __forceinline static GSVector4i x0007ffff(const GSVector4i& v) { return xffffffff(v).srl32<13>(); } + __forceinline static GSVector4i x000fffff(const GSVector4i& v) { return xffffffff(v).srl32<12>(); } + __forceinline static GSVector4i x001fffff(const GSVector4i& v) { return xffffffff(v).srl32<11>(); } + __forceinline static GSVector4i x003fffff(const GSVector4i& v) { return xffffffff(v).srl32<10>(); } + __forceinline static GSVector4i x007fffff(const GSVector4i& v) { return xffffffff(v).srl32< 9>(); } + __forceinline static GSVector4i x00ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 8>(); } + __forceinline static GSVector4i x01ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 7>(); } + __forceinline static GSVector4i x03ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 6>(); } + __forceinline static GSVector4i x07ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 5>(); } + __forceinline static GSVector4i x0fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 4>(); } + __forceinline static GSVector4i x1fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 3>(); } + __forceinline static GSVector4i x3fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 2>(); } + __forceinline static GSVector4i x7fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 1>(); } + + __forceinline static GSVector4i x80000000(const GSVector4i& v) { return xffffffff(v).sll32<31>(); } + __forceinline static GSVector4i xc0000000(const GSVector4i& v) { return xffffffff(v).sll32<30>(); } + __forceinline static GSVector4i xe0000000(const GSVector4i& v) { return xffffffff(v).sll32<29>(); } + __forceinline static GSVector4i xf0000000(const GSVector4i& v) { return xffffffff(v).sll32<28>(); } + __forceinline static GSVector4i xf8000000(const GSVector4i& v) { return xffffffff(v).sll32<27>(); } + __forceinline static GSVector4i xfc000000(const GSVector4i& v) { return xffffffff(v).sll32<26>(); } + __forceinline static GSVector4i xfe000000(const GSVector4i& v) { return xffffffff(v).sll32<25>(); } + __forceinline static GSVector4i xff000000(const GSVector4i& v) { return xffffffff(v).sll32<24>(); } + __forceinline static GSVector4i xff800000(const GSVector4i& v) { return xffffffff(v).sll32<23>(); } + __forceinline static GSVector4i xffc00000(const GSVector4i& v) { return xffffffff(v).sll32<22>(); } + __forceinline static GSVector4i xffe00000(const GSVector4i& v) { return xffffffff(v).sll32<21>(); } + __forceinline static GSVector4i xfff00000(const GSVector4i& v) { return xffffffff(v).sll32<20>(); } + __forceinline static GSVector4i xfff80000(const GSVector4i& v) { return xffffffff(v).sll32<19>(); } + __forceinline static GSVector4i xfffc0000(const GSVector4i& v) { return xffffffff(v).sll32<18>(); } + __forceinline static GSVector4i xfffe0000(const GSVector4i& v) { return xffffffff(v).sll32<17>(); } + __forceinline static GSVector4i xffff0000(const GSVector4i& v) { return xffffffff(v).sll32<16>(); } + __forceinline static GSVector4i xffff8000(const GSVector4i& v) { return xffffffff(v).sll32<15>(); } + __forceinline static GSVector4i xffffc000(const GSVector4i& v) { return xffffffff(v).sll32<14>(); } + __forceinline static GSVector4i xffffe000(const GSVector4i& v) { return xffffffff(v).sll32<13>(); } + __forceinline static GSVector4i xfffff000(const GSVector4i& v) { return xffffffff(v).sll32<12>(); } + __forceinline static GSVector4i xfffff800(const GSVector4i& v) { return xffffffff(v).sll32<11>(); } + __forceinline static GSVector4i xfffffc00(const GSVector4i& v) { return xffffffff(v).sll32<10>(); } + __forceinline static GSVector4i xfffffe00(const GSVector4i& v) { return xffffffff(v).sll32< 9>(); } + __forceinline static GSVector4i xffffff00(const GSVector4i& v) { return xffffffff(v).sll32< 8>(); } + __forceinline static GSVector4i xffffff80(const GSVector4i& v) { return xffffffff(v).sll32< 7>(); } + __forceinline static GSVector4i xffffffc0(const GSVector4i& v) { return xffffffff(v).sll32< 6>(); } + __forceinline static GSVector4i xffffffe0(const GSVector4i& v) { return xffffffff(v).sll32< 5>(); } + __forceinline static GSVector4i xfffffff0(const GSVector4i& v) { return xffffffff(v).sll32< 4>(); } + __forceinline static GSVector4i xfffffff8(const GSVector4i& v) { return xffffffff(v).sll32< 3>(); } + __forceinline static GSVector4i xfffffffc(const GSVector4i& v) { return xffffffff(v).sll32< 2>(); } + __forceinline static GSVector4i xfffffffe(const GSVector4i& v) { return xffffffff(v).sll32< 1>(); } + + __forceinline static GSVector4i x0001(const GSVector4i& v) { return xffffffff(v).srl16<15>(); } + __forceinline static GSVector4i x0003(const GSVector4i& v) { return xffffffff(v).srl16<14>(); } + __forceinline static GSVector4i x0007(const GSVector4i& v) { return xffffffff(v).srl16<13>(); } + __forceinline static GSVector4i x000f(const GSVector4i& v) { return xffffffff(v).srl16<12>(); } + __forceinline static GSVector4i x001f(const GSVector4i& v) { return xffffffff(v).srl16<11>(); } + __forceinline static GSVector4i x003f(const GSVector4i& v) { return xffffffff(v).srl16<10>(); } + __forceinline static GSVector4i x007f(const GSVector4i& v) { return xffffffff(v).srl16< 9>(); } + __forceinline static GSVector4i x00ff(const GSVector4i& v) { return xffffffff(v).srl16< 8>(); } + __forceinline static GSVector4i x01ff(const GSVector4i& v) { return xffffffff(v).srl16< 7>(); } + __forceinline static GSVector4i x03ff(const GSVector4i& v) { return xffffffff(v).srl16< 6>(); } + __forceinline static GSVector4i x07ff(const GSVector4i& v) { return xffffffff(v).srl16< 5>(); } + __forceinline static GSVector4i x0fff(const GSVector4i& v) { return xffffffff(v).srl16< 4>(); } + __forceinline static GSVector4i x1fff(const GSVector4i& v) { return xffffffff(v).srl16< 3>(); } + __forceinline static GSVector4i x3fff(const GSVector4i& v) { return xffffffff(v).srl16< 2>(); } + __forceinline static GSVector4i x7fff(const GSVector4i& v) { return xffffffff(v).srl16< 1>(); } + + __forceinline static GSVector4i x8000(const GSVector4i& v) { return xffffffff(v).sll16<15>(); } + __forceinline static GSVector4i xc000(const GSVector4i& v) { return xffffffff(v).sll16<14>(); } + __forceinline static GSVector4i xe000(const GSVector4i& v) { return xffffffff(v).sll16<13>(); } + __forceinline static GSVector4i xf000(const GSVector4i& v) { return xffffffff(v).sll16<12>(); } + __forceinline static GSVector4i xf800(const GSVector4i& v) { return xffffffff(v).sll16<11>(); } + __forceinline static GSVector4i xfc00(const GSVector4i& v) { return xffffffff(v).sll16<10>(); } + __forceinline static GSVector4i xfe00(const GSVector4i& v) { return xffffffff(v).sll16< 9>(); } + __forceinline static GSVector4i xff00(const GSVector4i& v) { return xffffffff(v).sll16< 8>(); } + __forceinline static GSVector4i xff80(const GSVector4i& v) { return xffffffff(v).sll16< 7>(); } + __forceinline static GSVector4i xffc0(const GSVector4i& v) { return xffffffff(v).sll16< 6>(); } + __forceinline static GSVector4i xffe0(const GSVector4i& v) { return xffffffff(v).sll16< 5>(); } + __forceinline static GSVector4i xfff0(const GSVector4i& v) { return xffffffff(v).sll16< 4>(); } + __forceinline static GSVector4i xfff8(const GSVector4i& v) { return xffffffff(v).sll16< 3>(); } + __forceinline static GSVector4i xfffc(const GSVector4i& v) { return xffffffff(v).sll16< 2>(); } + __forceinline static GSVector4i xfffe(const GSVector4i& v) { return xffffffff(v).sll16< 1>(); } __forceinline static GSVector4i xff(int n) { return m_xff[n]; } __forceinline static GSVector4i x0f(int n) { return m_x0f[n]; } diff --git a/pcsx2/GS/GSVector8i.h b/pcsx2/GS/GSVector8i.h index 0bf4ceb3cc95e3..76ea41c4652cae 100644 --- a/pcsx2/GS/GSVector8i.h +++ b/pcsx2/GS/GSVector8i.h @@ -581,151 +581,84 @@ class alignas(32) GSVector8i //return GSVector8i(_mm256_slli_si128(m, i)); } - __forceinline GSVector8i sra16(int i) const + template + __forceinline GSVector8i sra16() const { return GSVector8i(_mm256_srai_epi16(m, i)); } - __forceinline GSVector8i sra16(__m128i i) const - { - return GSVector8i(_mm256_sra_epi16(m, i)); - } - - __forceinline GSVector8i sra16(__m256i i) const + __forceinline GSVector8i srav16(const GSVector8i& i) const { - return GSVector8i(_mm256_sra_epi16(m, _mm256_castsi256_si128(i))); + return GSVector8i(_mm256_srav_epi16(m, i.m)); } - __forceinline GSVector8i sra32(int i) const + template + __forceinline GSVector8i sra32() const { return GSVector8i(_mm256_srai_epi32(m, i)); } - __forceinline GSVector8i sra32(__m128i i) const + __forceinline GSVector8i srav32(const GSVector8i& i) const { - return GSVector8i(_mm256_sra_epi32(m, i)); + return GSVector8i(_mm256_srav_epi32(m, i.m)); } - __forceinline GSVector8i sra32(__m256i i) const - { - return GSVector8i(_mm256_sra_epi32(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srav32(__m256i i) const - { - return GSVector8i(_mm256_srav_epi32(m, i)); - } - - __forceinline GSVector8i sll16(int i) const + template + __forceinline GSVector8i sll16() const { return GSVector8i(_mm256_slli_epi16(m, i)); } - __forceinline GSVector8i sll16(__m128i i) const - { - return GSVector8i(_mm256_sll_epi16(m, i)); - } - - __forceinline GSVector8i sll16(__m256i i) const + __forceinline GSVector8i sllv16(const GSVector8i& i) const { - return GSVector8i(_mm256_sll_epi16(m, _mm256_castsi256_si128(i))); + return GSVector8i(_mm256_sllv_epi16(m, i.m)); } - __forceinline GSVector8i sll32(int i) const + template + __forceinline GSVector8i sll32() const { return GSVector8i(_mm256_slli_epi32(m, i)); } - __forceinline GSVector8i sll32(__m128i i) const + __forceinline GSVector8i sllv32(const GSVector8i& i) const { - return GSVector8i(_mm256_sll_epi32(m, i)); + return GSVector8i(_mm256_sllv_epi32(m, i.m)); } - __forceinline GSVector8i sll32(__m256i i) const - { - return GSVector8i(_mm256_sll_epi32(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i sllv32(__m256i i) const - { - return GSVector8i(_mm256_sllv_epi32(m, i)); - } - - __forceinline GSVector8i sll64(int i) const + template + __forceinline GSVector8i sll64() const { return GSVector8i(_mm256_slli_epi64(m, i)); } - __forceinline GSVector8i sll64(__m128i i) const - { - return GSVector8i(_mm256_sll_epi64(m, i)); - } - - __forceinline GSVector8i sll64(__m256i i) const + __forceinline GSVector8i sllv64(const GSVector8i& i) const { - return GSVector8i(_mm256_sll_epi64(m, _mm256_castsi256_si128(i))); + return GSVector8i(_mm256_sllv_epi64(m, i.m)); } - __forceinline GSVector8i sllv64(__m256i i) const - { - return GSVector8i(_mm256_sllv_epi64(m, i)); - } - - __forceinline GSVector8i srl16(int i) const + template + __forceinline GSVector8i srl16() const { return GSVector8i(_mm256_srli_epi16(m, i)); } - __forceinline GSVector8i srl16(__m128i i) const - { - return GSVector8i(_mm256_srl_epi16(m, i)); - } - - __forceinline GSVector8i srl16(__m256i i) const - { - return GSVector8i(_mm256_srl_epi16(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srl32(int i) const + template + __forceinline GSVector8i srl32() const { return GSVector8i(_mm256_srli_epi32(m, i)); } - __forceinline GSVector8i srl32(__m128i i) const - { - return GSVector8i(_mm256_srl_epi32(m, i)); - } - - __forceinline GSVector8i srl32(__m256i i) const + __forceinline GSVector8i srlv32(const GSVector8i& i) const { - return GSVector8i(_mm256_srl_epi32(m, _mm256_castsi256_si128(i))); + return GSVector8i(_mm256_srlv_epi32(m, i.m)); } - __forceinline GSVector8i srlv32(__m256i i) const - { - return GSVector8i(_mm256_srlv_epi32(m, i)); - } - - __forceinline GSVector8i srl64(int i) const + template + __forceinline GSVector8i srl64() const { return GSVector8i(_mm256_srli_epi64(m, i)); } - __forceinline GSVector8i srl64(__m128i i) const - { - return GSVector8i(_mm256_srl_epi64(m, i)); - } - - __forceinline GSVector8i srl64(__m256i i) const - { - return GSVector8i(_mm256_srl_epi64(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srlv64(__m256i i) const - { - return GSVector8i(_mm256_srlv_epi64(m, i)); - } - __forceinline GSVector8i add8(const GSVector8i& v) const { return GSVector8i(_mm256_add_epi8(m, v.m)); @@ -864,7 +797,7 @@ class alignas(32) GSVector8i { // (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit) - return add16(a.sub16(*this).mul16l(f).sra16(4)); + return add16(a.sub16(*this).mul16l(f).sra16<4>()); } template @@ -877,7 +810,7 @@ class alignas(32) GSVector8i return mul16hrs(f); } - return sll16(shift + 1).mul16hs(f); + return sll16().mul16hs(f); } __forceinline bool eq(const GSVector8i& v) const @@ -1676,199 +1609,199 @@ class alignas(32) GSVector8i __forceinline static GSVector8i xffffffff() { return zero() == zero(); } - __forceinline static GSVector8i x00000001() { return xffffffff().srl32(31); } - __forceinline static GSVector8i x00000003() { return xffffffff().srl32(30); } - __forceinline static GSVector8i x00000007() { return xffffffff().srl32(29); } - __forceinline static GSVector8i x0000000f() { return xffffffff().srl32(28); } - __forceinline static GSVector8i x0000001f() { return xffffffff().srl32(27); } - __forceinline static GSVector8i x0000003f() { return xffffffff().srl32(26); } - __forceinline static GSVector8i x0000007f() { return xffffffff().srl32(25); } - __forceinline static GSVector8i x000000ff() { return xffffffff().srl32(24); } - __forceinline static GSVector8i x000001ff() { return xffffffff().srl32(23); } - __forceinline static GSVector8i x000003ff() { return xffffffff().srl32(22); } - __forceinline static GSVector8i x000007ff() { return xffffffff().srl32(21); } - __forceinline static GSVector8i x00000fff() { return xffffffff().srl32(20); } - __forceinline static GSVector8i x00001fff() { return xffffffff().srl32(19); } - __forceinline static GSVector8i x00003fff() { return xffffffff().srl32(18); } - __forceinline static GSVector8i x00007fff() { return xffffffff().srl32(17); } - __forceinline static GSVector8i x0000ffff() { return xffffffff().srl32(16); } - __forceinline static GSVector8i x0001ffff() { return xffffffff().srl32(15); } - __forceinline static GSVector8i x0003ffff() { return xffffffff().srl32(14); } - __forceinline static GSVector8i x0007ffff() { return xffffffff().srl32(13); } - __forceinline static GSVector8i x000fffff() { return xffffffff().srl32(12); } - __forceinline static GSVector8i x001fffff() { return xffffffff().srl32(11); } - __forceinline static GSVector8i x003fffff() { return xffffffff().srl32(10); } - __forceinline static GSVector8i x007fffff() { return xffffffff().srl32( 9); } - __forceinline static GSVector8i x00ffffff() { return xffffffff().srl32( 8); } - __forceinline static GSVector8i x01ffffff() { return xffffffff().srl32( 7); } - __forceinline static GSVector8i x03ffffff() { return xffffffff().srl32( 6); } - __forceinline static GSVector8i x07ffffff() { return xffffffff().srl32( 5); } - __forceinline static GSVector8i x0fffffff() { return xffffffff().srl32( 4); } - __forceinline static GSVector8i x1fffffff() { return xffffffff().srl32( 3); } - __forceinline static GSVector8i x3fffffff() { return xffffffff().srl32( 2); } - __forceinline static GSVector8i x7fffffff() { return xffffffff().srl32( 1); } - - __forceinline static GSVector8i x80000000() { return xffffffff().sll32(31); } - __forceinline static GSVector8i xc0000000() { return xffffffff().sll32(30); } - __forceinline static GSVector8i xe0000000() { return xffffffff().sll32(29); } - __forceinline static GSVector8i xf0000000() { return xffffffff().sll32(28); } - __forceinline static GSVector8i xf8000000() { return xffffffff().sll32(27); } - __forceinline static GSVector8i xfc000000() { return xffffffff().sll32(26); } - __forceinline static GSVector8i xfe000000() { return xffffffff().sll32(25); } - __forceinline static GSVector8i xff000000() { return xffffffff().sll32(24); } - __forceinline static GSVector8i xff800000() { return xffffffff().sll32(23); } - __forceinline static GSVector8i xffc00000() { return xffffffff().sll32(22); } - __forceinline static GSVector8i xffe00000() { return xffffffff().sll32(21); } - __forceinline static GSVector8i xfff00000() { return xffffffff().sll32(20); } - __forceinline static GSVector8i xfff80000() { return xffffffff().sll32(19); } - __forceinline static GSVector8i xfffc0000() { return xffffffff().sll32(18); } - __forceinline static GSVector8i xfffe0000() { return xffffffff().sll32(17); } - __forceinline static GSVector8i xffff0000() { return xffffffff().sll32(16); } - __forceinline static GSVector8i xffff8000() { return xffffffff().sll32(15); } - __forceinline static GSVector8i xffffc000() { return xffffffff().sll32(14); } - __forceinline static GSVector8i xffffe000() { return xffffffff().sll32(13); } - __forceinline static GSVector8i xfffff000() { return xffffffff().sll32(12); } - __forceinline static GSVector8i xfffff800() { return xffffffff().sll32(11); } - __forceinline static GSVector8i xfffffc00() { return xffffffff().sll32(10); } - __forceinline static GSVector8i xfffffe00() { return xffffffff().sll32( 9); } - __forceinline static GSVector8i xffffff00() { return xffffffff().sll32( 8); } - __forceinline static GSVector8i xffffff80() { return xffffffff().sll32( 7); } - __forceinline static GSVector8i xffffffc0() { return xffffffff().sll32( 6); } - __forceinline static GSVector8i xffffffe0() { return xffffffff().sll32( 5); } - __forceinline static GSVector8i xfffffff0() { return xffffffff().sll32( 4); } - __forceinline static GSVector8i xfffffff8() { return xffffffff().sll32( 3); } - __forceinline static GSVector8i xfffffffc() { return xffffffff().sll32( 2); } - __forceinline static GSVector8i xfffffffe() { return xffffffff().sll32( 1); } - - __forceinline static GSVector8i x0001() { return xffffffff().srl16(15); } - __forceinline static GSVector8i x0003() { return xffffffff().srl16(14); } - __forceinline static GSVector8i x0007() { return xffffffff().srl16(13); } - __forceinline static GSVector8i x000f() { return xffffffff().srl16(12); } - __forceinline static GSVector8i x001f() { return xffffffff().srl16(11); } - __forceinline static GSVector8i x003f() { return xffffffff().srl16(10); } - __forceinline static GSVector8i x007f() { return xffffffff().srl16( 9); } - __forceinline static GSVector8i x00ff() { return xffffffff().srl16( 8); } - __forceinline static GSVector8i x01ff() { return xffffffff().srl16( 7); } - __forceinline static GSVector8i x03ff() { return xffffffff().srl16( 6); } - __forceinline static GSVector8i x07ff() { return xffffffff().srl16( 5); } - __forceinline static GSVector8i x0fff() { return xffffffff().srl16( 4); } - __forceinline static GSVector8i x1fff() { return xffffffff().srl16( 3); } - __forceinline static GSVector8i x3fff() { return xffffffff().srl16( 2); } - __forceinline static GSVector8i x7fff() { return xffffffff().srl16( 1); } - - __forceinline static GSVector8i x8000() { return xffffffff().sll16(15); } - __forceinline static GSVector8i xc000() { return xffffffff().sll16(14); } - __forceinline static GSVector8i xe000() { return xffffffff().sll16(13); } - __forceinline static GSVector8i xf000() { return xffffffff().sll16(12); } - __forceinline static GSVector8i xf800() { return xffffffff().sll16(11); } - __forceinline static GSVector8i xfc00() { return xffffffff().sll16(10); } - __forceinline static GSVector8i xfe00() { return xffffffff().sll16( 9); } - __forceinline static GSVector8i xff00() { return xffffffff().sll16( 8); } - __forceinline static GSVector8i xff80() { return xffffffff().sll16( 7); } - __forceinline static GSVector8i xffc0() { return xffffffff().sll16( 6); } - __forceinline static GSVector8i xffe0() { return xffffffff().sll16( 5); } - __forceinline static GSVector8i xfff0() { return xffffffff().sll16( 4); } - __forceinline static GSVector8i xfff8() { return xffffffff().sll16( 3); } - __forceinline static GSVector8i xfffc() { return xffffffff().sll16( 2); } - __forceinline static GSVector8i xfffe() { return xffffffff().sll16( 1); } + __forceinline static GSVector8i x00000001() { return xffffffff().srl32<31>(); } + __forceinline static GSVector8i x00000003() { return xffffffff().srl32<30>(); } + __forceinline static GSVector8i x00000007() { return xffffffff().srl32<29>(); } + __forceinline static GSVector8i x0000000f() { return xffffffff().srl32<28>(); } + __forceinline static GSVector8i x0000001f() { return xffffffff().srl32<27>(); } + __forceinline static GSVector8i x0000003f() { return xffffffff().srl32<26>(); } + __forceinline static GSVector8i x0000007f() { return xffffffff().srl32<25>(); } + __forceinline static GSVector8i x000000ff() { return xffffffff().srl32<24>(); } + __forceinline static GSVector8i x000001ff() { return xffffffff().srl32<23>(); } + __forceinline static GSVector8i x000003ff() { return xffffffff().srl32<22>(); } + __forceinline static GSVector8i x000007ff() { return xffffffff().srl32<21>(); } + __forceinline static GSVector8i x00000fff() { return xffffffff().srl32<20>(); } + __forceinline static GSVector8i x00001fff() { return xffffffff().srl32<19>(); } + __forceinline static GSVector8i x00003fff() { return xffffffff().srl32<18>(); } + __forceinline static GSVector8i x00007fff() { return xffffffff().srl32<17>(); } + __forceinline static GSVector8i x0000ffff() { return xffffffff().srl32<16>(); } + __forceinline static GSVector8i x0001ffff() { return xffffffff().srl32<15>(); } + __forceinline static GSVector8i x0003ffff() { return xffffffff().srl32<14>(); } + __forceinline static GSVector8i x0007ffff() { return xffffffff().srl32<13>(); } + __forceinline static GSVector8i x000fffff() { return xffffffff().srl32<12>(); } + __forceinline static GSVector8i x001fffff() { return xffffffff().srl32<11>(); } + __forceinline static GSVector8i x003fffff() { return xffffffff().srl32<10>(); } + __forceinline static GSVector8i x007fffff() { return xffffffff().srl32< 9>(); } + __forceinline static GSVector8i x00ffffff() { return xffffffff().srl32< 8>(); } + __forceinline static GSVector8i x01ffffff() { return xffffffff().srl32< 7>(); } + __forceinline static GSVector8i x03ffffff() { return xffffffff().srl32< 6>(); } + __forceinline static GSVector8i x07ffffff() { return xffffffff().srl32< 5>(); } + __forceinline static GSVector8i x0fffffff() { return xffffffff().srl32< 4>(); } + __forceinline static GSVector8i x1fffffff() { return xffffffff().srl32< 3>(); } + __forceinline static GSVector8i x3fffffff() { return xffffffff().srl32< 2>(); } + __forceinline static GSVector8i x7fffffff() { return xffffffff().srl32< 1>(); } + + __forceinline static GSVector8i x80000000() { return xffffffff().sll32<31>(); } + __forceinline static GSVector8i xc0000000() { return xffffffff().sll32<30>(); } + __forceinline static GSVector8i xe0000000() { return xffffffff().sll32<29>(); } + __forceinline static GSVector8i xf0000000() { return xffffffff().sll32<28>(); } + __forceinline static GSVector8i xf8000000() { return xffffffff().sll32<27>(); } + __forceinline static GSVector8i xfc000000() { return xffffffff().sll32<26>(); } + __forceinline static GSVector8i xfe000000() { return xffffffff().sll32<25>(); } + __forceinline static GSVector8i xff000000() { return xffffffff().sll32<24>(); } + __forceinline static GSVector8i xff800000() { return xffffffff().sll32<23>(); } + __forceinline static GSVector8i xffc00000() { return xffffffff().sll32<22>(); } + __forceinline static GSVector8i xffe00000() { return xffffffff().sll32<21>(); } + __forceinline static GSVector8i xfff00000() { return xffffffff().sll32<20>(); } + __forceinline static GSVector8i xfff80000() { return xffffffff().sll32<19>(); } + __forceinline static GSVector8i xfffc0000() { return xffffffff().sll32<18>(); } + __forceinline static GSVector8i xfffe0000() { return xffffffff().sll32<17>(); } + __forceinline static GSVector8i xffff0000() { return xffffffff().sll32<16>(); } + __forceinline static GSVector8i xffff8000() { return xffffffff().sll32<15>(); } + __forceinline static GSVector8i xffffc000() { return xffffffff().sll32<14>(); } + __forceinline static GSVector8i xffffe000() { return xffffffff().sll32<13>(); } + __forceinline static GSVector8i xfffff000() { return xffffffff().sll32<12>(); } + __forceinline static GSVector8i xfffff800() { return xffffffff().sll32<11>(); } + __forceinline static GSVector8i xfffffc00() { return xffffffff().sll32<10>(); } + __forceinline static GSVector8i xfffffe00() { return xffffffff().sll32< 9>(); } + __forceinline static GSVector8i xffffff00() { return xffffffff().sll32< 8>(); } + __forceinline static GSVector8i xffffff80() { return xffffffff().sll32< 7>(); } + __forceinline static GSVector8i xffffffc0() { return xffffffff().sll32< 6>(); } + __forceinline static GSVector8i xffffffe0() { return xffffffff().sll32< 5>(); } + __forceinline static GSVector8i xfffffff0() { return xffffffff().sll32< 4>(); } + __forceinline static GSVector8i xfffffff8() { return xffffffff().sll32< 3>(); } + __forceinline static GSVector8i xfffffffc() { return xffffffff().sll32< 2>(); } + __forceinline static GSVector8i xfffffffe() { return xffffffff().sll32< 1>(); } + + __forceinline static GSVector8i x0001() { return xffffffff().srl16<15>(); } + __forceinline static GSVector8i x0003() { return xffffffff().srl16<14>(); } + __forceinline static GSVector8i x0007() { return xffffffff().srl16<13>(); } + __forceinline static GSVector8i x000f() { return xffffffff().srl16<12>(); } + __forceinline static GSVector8i x001f() { return xffffffff().srl16<11>(); } + __forceinline static GSVector8i x003f() { return xffffffff().srl16<10>(); } + __forceinline static GSVector8i x007f() { return xffffffff().srl16< 9>(); } + __forceinline static GSVector8i x00ff() { return xffffffff().srl16< 8>(); } + __forceinline static GSVector8i x01ff() { return xffffffff().srl16< 7>(); } + __forceinline static GSVector8i x03ff() { return xffffffff().srl16< 6>(); } + __forceinline static GSVector8i x07ff() { return xffffffff().srl16< 5>(); } + __forceinline static GSVector8i x0fff() { return xffffffff().srl16< 4>(); } + __forceinline static GSVector8i x1fff() { return xffffffff().srl16< 3>(); } + __forceinline static GSVector8i x3fff() { return xffffffff().srl16< 2>(); } + __forceinline static GSVector8i x7fff() { return xffffffff().srl16< 1>(); } + + __forceinline static GSVector8i x8000() { return xffffffff().sll16<15>(); } + __forceinline static GSVector8i xc000() { return xffffffff().sll16<14>(); } + __forceinline static GSVector8i xe000() { return xffffffff().sll16<13>(); } + __forceinline static GSVector8i xf000() { return xffffffff().sll16<12>(); } + __forceinline static GSVector8i xf800() { return xffffffff().sll16<11>(); } + __forceinline static GSVector8i xfc00() { return xffffffff().sll16<10>(); } + __forceinline static GSVector8i xfe00() { return xffffffff().sll16< 9>(); } + __forceinline static GSVector8i xff00() { return xffffffff().sll16< 8>(); } + __forceinline static GSVector8i xff80() { return xffffffff().sll16< 7>(); } + __forceinline static GSVector8i xffc0() { return xffffffff().sll16< 6>(); } + __forceinline static GSVector8i xffe0() { return xffffffff().sll16< 5>(); } + __forceinline static GSVector8i xfff0() { return xffffffff().sll16< 4>(); } + __forceinline static GSVector8i xfff8() { return xffffffff().sll16< 3>(); } + __forceinline static GSVector8i xfffc() { return xffffffff().sll16< 2>(); } + __forceinline static GSVector8i xfffe() { return xffffffff().sll16< 1>(); } __forceinline static GSVector8i xffffffff(const GSVector8i& v) { return v == v; } - __forceinline static GSVector8i x00000001(const GSVector8i& v) { return xffffffff(v).srl32(31); } - __forceinline static GSVector8i x00000003(const GSVector8i& v) { return xffffffff(v).srl32(30); } - __forceinline static GSVector8i x00000007(const GSVector8i& v) { return xffffffff(v).srl32(29); } - __forceinline static GSVector8i x0000000f(const GSVector8i& v) { return xffffffff(v).srl32(28); } - __forceinline static GSVector8i x0000001f(const GSVector8i& v) { return xffffffff(v).srl32(27); } - __forceinline static GSVector8i x0000003f(const GSVector8i& v) { return xffffffff(v).srl32(26); } - __forceinline static GSVector8i x0000007f(const GSVector8i& v) { return xffffffff(v).srl32(25); } - __forceinline static GSVector8i x000000ff(const GSVector8i& v) { return xffffffff(v).srl32(24); } - __forceinline static GSVector8i x000001ff(const GSVector8i& v) { return xffffffff(v).srl32(23); } - __forceinline static GSVector8i x000003ff(const GSVector8i& v) { return xffffffff(v).srl32(22); } - __forceinline static GSVector8i x000007ff(const GSVector8i& v) { return xffffffff(v).srl32(21); } - __forceinline static GSVector8i x00000fff(const GSVector8i& v) { return xffffffff(v).srl32(20); } - __forceinline static GSVector8i x00001fff(const GSVector8i& v) { return xffffffff(v).srl32(19); } - __forceinline static GSVector8i x00003fff(const GSVector8i& v) { return xffffffff(v).srl32(18); } - __forceinline static GSVector8i x00007fff(const GSVector8i& v) { return xffffffff(v).srl32(17); } - __forceinline static GSVector8i x0000ffff(const GSVector8i& v) { return xffffffff(v).srl32(16); } - __forceinline static GSVector8i x0001ffff(const GSVector8i& v) { return xffffffff(v).srl32(15); } - __forceinline static GSVector8i x0003ffff(const GSVector8i& v) { return xffffffff(v).srl32(14); } - __forceinline static GSVector8i x0007ffff(const GSVector8i& v) { return xffffffff(v).srl32(13); } - __forceinline static GSVector8i x000fffff(const GSVector8i& v) { return xffffffff(v).srl32(12); } - __forceinline static GSVector8i x001fffff(const GSVector8i& v) { return xffffffff(v).srl32(11); } - __forceinline static GSVector8i x003fffff(const GSVector8i& v) { return xffffffff(v).srl32(10); } - __forceinline static GSVector8i x007fffff(const GSVector8i& v) { return xffffffff(v).srl32( 9); } - __forceinline static GSVector8i x00ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 8); } - __forceinline static GSVector8i x01ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 7); } - __forceinline static GSVector8i x03ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 6); } - __forceinline static GSVector8i x07ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 5); } - __forceinline static GSVector8i x0fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 4); } - __forceinline static GSVector8i x1fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 3); } - __forceinline static GSVector8i x3fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 2); } - __forceinline static GSVector8i x7fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 1); } - - __forceinline static GSVector8i x80000000(const GSVector8i& v) { return xffffffff(v).sll32(31); } - __forceinline static GSVector8i xc0000000(const GSVector8i& v) { return xffffffff(v).sll32(30); } - __forceinline static GSVector8i xe0000000(const GSVector8i& v) { return xffffffff(v).sll32(29); } - __forceinline static GSVector8i xf0000000(const GSVector8i& v) { return xffffffff(v).sll32(28); } - __forceinline static GSVector8i xf8000000(const GSVector8i& v) { return xffffffff(v).sll32(27); } - __forceinline static GSVector8i xfc000000(const GSVector8i& v) { return xffffffff(v).sll32(26); } - __forceinline static GSVector8i xfe000000(const GSVector8i& v) { return xffffffff(v).sll32(25); } - __forceinline static GSVector8i xff000000(const GSVector8i& v) { return xffffffff(v).sll32(24); } - __forceinline static GSVector8i xff800000(const GSVector8i& v) { return xffffffff(v).sll32(23); } - __forceinline static GSVector8i xffc00000(const GSVector8i& v) { return xffffffff(v).sll32(22); } - __forceinline static GSVector8i xffe00000(const GSVector8i& v) { return xffffffff(v).sll32(21); } - __forceinline static GSVector8i xfff00000(const GSVector8i& v) { return xffffffff(v).sll32(20); } - __forceinline static GSVector8i xfff80000(const GSVector8i& v) { return xffffffff(v).sll32(19); } - __forceinline static GSVector8i xfffc0000(const GSVector8i& v) { return xffffffff(v).sll32(18); } - __forceinline static GSVector8i xfffe0000(const GSVector8i& v) { return xffffffff(v).sll32(17); } - __forceinline static GSVector8i xffff0000(const GSVector8i& v) { return xffffffff(v).sll32(16); } - __forceinline static GSVector8i xffff8000(const GSVector8i& v) { return xffffffff(v).sll32(15); } - __forceinline static GSVector8i xffffc000(const GSVector8i& v) { return xffffffff(v).sll32(14); } - __forceinline static GSVector8i xffffe000(const GSVector8i& v) { return xffffffff(v).sll32(13); } - __forceinline static GSVector8i xfffff000(const GSVector8i& v) { return xffffffff(v).sll32(12); } - __forceinline static GSVector8i xfffff800(const GSVector8i& v) { return xffffffff(v).sll32(11); } - __forceinline static GSVector8i xfffffc00(const GSVector8i& v) { return xffffffff(v).sll32(10); } - __forceinline static GSVector8i xfffffe00(const GSVector8i& v) { return xffffffff(v).sll32( 9); } - __forceinline static GSVector8i xffffff00(const GSVector8i& v) { return xffffffff(v).sll32( 8); } - __forceinline static GSVector8i xffffff80(const GSVector8i& v) { return xffffffff(v).sll32( 7); } - __forceinline static GSVector8i xffffffc0(const GSVector8i& v) { return xffffffff(v).sll32( 6); } - __forceinline static GSVector8i xffffffe0(const GSVector8i& v) { return xffffffff(v).sll32( 5); } - __forceinline static GSVector8i xfffffff0(const GSVector8i& v) { return xffffffff(v).sll32( 4); } - __forceinline static GSVector8i xfffffff8(const GSVector8i& v) { return xffffffff(v).sll32( 3); } - __forceinline static GSVector8i xfffffffc(const GSVector8i& v) { return xffffffff(v).sll32( 2); } - __forceinline static GSVector8i xfffffffe(const GSVector8i& v) { return xffffffff(v).sll32( 1); } - - __forceinline static GSVector8i x0001(const GSVector8i& v) { return xffffffff(v).srl16(15); } - __forceinline static GSVector8i x0003(const GSVector8i& v) { return xffffffff(v).srl16(14); } - __forceinline static GSVector8i x0007(const GSVector8i& v) { return xffffffff(v).srl16(13); } - __forceinline static GSVector8i x000f(const GSVector8i& v) { return xffffffff(v).srl16(12); } - __forceinline static GSVector8i x001f(const GSVector8i& v) { return xffffffff(v).srl16(11); } - __forceinline static GSVector8i x003f(const GSVector8i& v) { return xffffffff(v).srl16(10); } - __forceinline static GSVector8i x007f(const GSVector8i& v) { return xffffffff(v).srl16( 9); } - __forceinline static GSVector8i x00ff(const GSVector8i& v) { return xffffffff(v).srl16( 8); } - __forceinline static GSVector8i x01ff(const GSVector8i& v) { return xffffffff(v).srl16( 7); } - __forceinline static GSVector8i x03ff(const GSVector8i& v) { return xffffffff(v).srl16( 6); } - __forceinline static GSVector8i x07ff(const GSVector8i& v) { return xffffffff(v).srl16( 5); } - __forceinline static GSVector8i x0fff(const GSVector8i& v) { return xffffffff(v).srl16( 4); } - __forceinline static GSVector8i x1fff(const GSVector8i& v) { return xffffffff(v).srl16( 3); } - __forceinline static GSVector8i x3fff(const GSVector8i& v) { return xffffffff(v).srl16( 2); } - __forceinline static GSVector8i x7fff(const GSVector8i& v) { return xffffffff(v).srl16( 1); } - - __forceinline static GSVector8i x8000(const GSVector8i& v) { return xffffffff(v).sll16(15); } - __forceinline static GSVector8i xc000(const GSVector8i& v) { return xffffffff(v).sll16(14); } - __forceinline static GSVector8i xe000(const GSVector8i& v) { return xffffffff(v).sll16(13); } - __forceinline static GSVector8i xf000(const GSVector8i& v) { return xffffffff(v).sll16(12); } - __forceinline static GSVector8i xf800(const GSVector8i& v) { return xffffffff(v).sll16(11); } - __forceinline static GSVector8i xfc00(const GSVector8i& v) { return xffffffff(v).sll16(10); } - __forceinline static GSVector8i xfe00(const GSVector8i& v) { return xffffffff(v).sll16( 9); } - __forceinline static GSVector8i xff00(const GSVector8i& v) { return xffffffff(v).sll16( 8); } - __forceinline static GSVector8i xff80(const GSVector8i& v) { return xffffffff(v).sll16( 7); } - __forceinline static GSVector8i xffc0(const GSVector8i& v) { return xffffffff(v).sll16( 6); } - __forceinline static GSVector8i xffe0(const GSVector8i& v) { return xffffffff(v).sll16( 5); } - __forceinline static GSVector8i xfff0(const GSVector8i& v) { return xffffffff(v).sll16( 4); } - __forceinline static GSVector8i xfff8(const GSVector8i& v) { return xffffffff(v).sll16( 3); } - __forceinline static GSVector8i xfffc(const GSVector8i& v) { return xffffffff(v).sll16( 2); } - __forceinline static GSVector8i xfffe(const GSVector8i& v) { return xffffffff(v).sll16( 1); } + __forceinline static GSVector8i x00000001(const GSVector8i& v) { return xffffffff(v).srl32<31>(); } + __forceinline static GSVector8i x00000003(const GSVector8i& v) { return xffffffff(v).srl32<30>(); } + __forceinline static GSVector8i x00000007(const GSVector8i& v) { return xffffffff(v).srl32<29>(); } + __forceinline static GSVector8i x0000000f(const GSVector8i& v) { return xffffffff(v).srl32<28>(); } + __forceinline static GSVector8i x0000001f(const GSVector8i& v) { return xffffffff(v).srl32<27>(); } + __forceinline static GSVector8i x0000003f(const GSVector8i& v) { return xffffffff(v).srl32<26>(); } + __forceinline static GSVector8i x0000007f(const GSVector8i& v) { return xffffffff(v).srl32<25>(); } + __forceinline static GSVector8i x000000ff(const GSVector8i& v) { return xffffffff(v).srl32<24>(); } + __forceinline static GSVector8i x000001ff(const GSVector8i& v) { return xffffffff(v).srl32<23>(); } + __forceinline static GSVector8i x000003ff(const GSVector8i& v) { return xffffffff(v).srl32<22>(); } + __forceinline static GSVector8i x000007ff(const GSVector8i& v) { return xffffffff(v).srl32<21>(); } + __forceinline static GSVector8i x00000fff(const GSVector8i& v) { return xffffffff(v).srl32<20>(); } + __forceinline static GSVector8i x00001fff(const GSVector8i& v) { return xffffffff(v).srl32<19>(); } + __forceinline static GSVector8i x00003fff(const GSVector8i& v) { return xffffffff(v).srl32<18>(); } + __forceinline static GSVector8i x00007fff(const GSVector8i& v) { return xffffffff(v).srl32<17>(); } + __forceinline static GSVector8i x0000ffff(const GSVector8i& v) { return xffffffff(v).srl32<16>(); } + __forceinline static GSVector8i x0001ffff(const GSVector8i& v) { return xffffffff(v).srl32<15>(); } + __forceinline static GSVector8i x0003ffff(const GSVector8i& v) { return xffffffff(v).srl32<14>(); } + __forceinline static GSVector8i x0007ffff(const GSVector8i& v) { return xffffffff(v).srl32<13>(); } + __forceinline static GSVector8i x000fffff(const GSVector8i& v) { return xffffffff(v).srl32<12>(); } + __forceinline static GSVector8i x001fffff(const GSVector8i& v) { return xffffffff(v).srl32<11>(); } + __forceinline static GSVector8i x003fffff(const GSVector8i& v) { return xffffffff(v).srl32<10>(); } + __forceinline static GSVector8i x007fffff(const GSVector8i& v) { return xffffffff(v).srl32< 9>(); } + __forceinline static GSVector8i x00ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 8>(); } + __forceinline static GSVector8i x01ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 7>(); } + __forceinline static GSVector8i x03ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 6>(); } + __forceinline static GSVector8i x07ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 5>(); } + __forceinline static GSVector8i x0fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 4>(); } + __forceinline static GSVector8i x1fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 3>(); } + __forceinline static GSVector8i x3fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 2>(); } + __forceinline static GSVector8i x7fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 1>(); } + + __forceinline static GSVector8i x80000000(const GSVector8i& v) { return xffffffff(v).sll32<31>(); } + __forceinline static GSVector8i xc0000000(const GSVector8i& v) { return xffffffff(v).sll32<30>(); } + __forceinline static GSVector8i xe0000000(const GSVector8i& v) { return xffffffff(v).sll32<29>(); } + __forceinline static GSVector8i xf0000000(const GSVector8i& v) { return xffffffff(v).sll32<28>(); } + __forceinline static GSVector8i xf8000000(const GSVector8i& v) { return xffffffff(v).sll32<27>(); } + __forceinline static GSVector8i xfc000000(const GSVector8i& v) { return xffffffff(v).sll32<26>(); } + __forceinline static GSVector8i xfe000000(const GSVector8i& v) { return xffffffff(v).sll32<25>(); } + __forceinline static GSVector8i xff000000(const GSVector8i& v) { return xffffffff(v).sll32<24>(); } + __forceinline static GSVector8i xff800000(const GSVector8i& v) { return xffffffff(v).sll32<23>(); } + __forceinline static GSVector8i xffc00000(const GSVector8i& v) { return xffffffff(v).sll32<22>(); } + __forceinline static GSVector8i xffe00000(const GSVector8i& v) { return xffffffff(v).sll32<21>(); } + __forceinline static GSVector8i xfff00000(const GSVector8i& v) { return xffffffff(v).sll32<20>(); } + __forceinline static GSVector8i xfff80000(const GSVector8i& v) { return xffffffff(v).sll32<19>(); } + __forceinline static GSVector8i xfffc0000(const GSVector8i& v) { return xffffffff(v).sll32<18>(); } + __forceinline static GSVector8i xfffe0000(const GSVector8i& v) { return xffffffff(v).sll32<17>(); } + __forceinline static GSVector8i xffff0000(const GSVector8i& v) { return xffffffff(v).sll32<16>(); } + __forceinline static GSVector8i xffff8000(const GSVector8i& v) { return xffffffff(v).sll32<15>(); } + __forceinline static GSVector8i xffffc000(const GSVector8i& v) { return xffffffff(v).sll32<14>(); } + __forceinline static GSVector8i xffffe000(const GSVector8i& v) { return xffffffff(v).sll32<13>(); } + __forceinline static GSVector8i xfffff000(const GSVector8i& v) { return xffffffff(v).sll32<12>(); } + __forceinline static GSVector8i xfffff800(const GSVector8i& v) { return xffffffff(v).sll32<11>(); } + __forceinline static GSVector8i xfffffc00(const GSVector8i& v) { return xffffffff(v).sll32<10>(); } + __forceinline static GSVector8i xfffffe00(const GSVector8i& v) { return xffffffff(v).sll32< 9>(); } + __forceinline static GSVector8i xffffff00(const GSVector8i& v) { return xffffffff(v).sll32< 8>(); } + __forceinline static GSVector8i xffffff80(const GSVector8i& v) { return xffffffff(v).sll32< 7>(); } + __forceinline static GSVector8i xffffffc0(const GSVector8i& v) { return xffffffff(v).sll32< 6>(); } + __forceinline static GSVector8i xffffffe0(const GSVector8i& v) { return xffffffff(v).sll32< 5>(); } + __forceinline static GSVector8i xfffffff0(const GSVector8i& v) { return xffffffff(v).sll32< 4>(); } + __forceinline static GSVector8i xfffffff8(const GSVector8i& v) { return xffffffff(v).sll32< 3>(); } + __forceinline static GSVector8i xfffffffc(const GSVector8i& v) { return xffffffff(v).sll32< 2>(); } + __forceinline static GSVector8i xfffffffe(const GSVector8i& v) { return xffffffff(v).sll32< 1>(); } + + __forceinline static GSVector8i x0001(const GSVector8i& v) { return xffffffff(v).srl16<15>(); } + __forceinline static GSVector8i x0003(const GSVector8i& v) { return xffffffff(v).srl16<14>(); } + __forceinline static GSVector8i x0007(const GSVector8i& v) { return xffffffff(v).srl16<13>(); } + __forceinline static GSVector8i x000f(const GSVector8i& v) { return xffffffff(v).srl16<12>(); } + __forceinline static GSVector8i x001f(const GSVector8i& v) { return xffffffff(v).srl16<11>(); } + __forceinline static GSVector8i x003f(const GSVector8i& v) { return xffffffff(v).srl16<10>(); } + __forceinline static GSVector8i x007f(const GSVector8i& v) { return xffffffff(v).srl16< 9>(); } + __forceinline static GSVector8i x00ff(const GSVector8i& v) { return xffffffff(v).srl16< 8>(); } + __forceinline static GSVector8i x01ff(const GSVector8i& v) { return xffffffff(v).srl16< 7>(); } + __forceinline static GSVector8i x03ff(const GSVector8i& v) { return xffffffff(v).srl16< 6>(); } + __forceinline static GSVector8i x07ff(const GSVector8i& v) { return xffffffff(v).srl16< 5>(); } + __forceinline static GSVector8i x0fff(const GSVector8i& v) { return xffffffff(v).srl16< 4>(); } + __forceinline static GSVector8i x1fff(const GSVector8i& v) { return xffffffff(v).srl16< 3>(); } + __forceinline static GSVector8i x3fff(const GSVector8i& v) { return xffffffff(v).srl16< 2>(); } + __forceinline static GSVector8i x7fff(const GSVector8i& v) { return xffffffff(v).srl16< 1>(); } + + __forceinline static GSVector8i x8000(const GSVector8i& v) { return xffffffff(v).sll16<15>(); } + __forceinline static GSVector8i xc000(const GSVector8i& v) { return xffffffff(v).sll16<14>(); } + __forceinline static GSVector8i xe000(const GSVector8i& v) { return xffffffff(v).sll16<13>(); } + __forceinline static GSVector8i xf000(const GSVector8i& v) { return xffffffff(v).sll16<12>(); } + __forceinline static GSVector8i xf800(const GSVector8i& v) { return xffffffff(v).sll16<11>(); } + __forceinline static GSVector8i xfc00(const GSVector8i& v) { return xffffffff(v).sll16<10>(); } + __forceinline static GSVector8i xfe00(const GSVector8i& v) { return xffffffff(v).sll16< 9>(); } + __forceinline static GSVector8i xff00(const GSVector8i& v) { return xffffffff(v).sll16< 8>(); } + __forceinline static GSVector8i xff80(const GSVector8i& v) { return xffffffff(v).sll16< 7>(); } + __forceinline static GSVector8i xffc0(const GSVector8i& v) { return xffffffff(v).sll16< 6>(); } + __forceinline static GSVector8i xffe0(const GSVector8i& v) { return xffffffff(v).sll16< 5>(); } + __forceinline static GSVector8i xfff0(const GSVector8i& v) { return xffffffff(v).sll16< 4>(); } + __forceinline static GSVector8i xfff8(const GSVector8i& v) { return xffffffff(v).sll16< 3>(); } + __forceinline static GSVector8i xfffc(const GSVector8i& v) { return xffffffff(v).sll16< 2>(); } + __forceinline static GSVector8i xfffe(const GSVector8i& v) { return xffffffff(v).sll16< 1>(); } __forceinline static GSVector8i xff(int n) { return m_xff[n]; } __forceinline static GSVector8i x0f(int n) { return m_x0f[n]; } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 7b3e78026e609a..08549831cc0fc5 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -320,7 +320,7 @@ void GSRendererHW::ExpandLineIndices() read -= 1; write -= expansion_factor; - const GSVector4i in = read->sll16(2); + const GSVector4i in = read->sll16<2>(); write[0] = in.shuffle8(mask0) | low0; write[1] = in.shuffle8(mask1) | low1; write[2] = in.shuffle8(mask2) | low2; @@ -373,7 +373,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS static_cast(m_vt.m_min.p.x), static_cast(m_vt.m_min.p.y), static_cast(m_vt.m_min.p.z), static_cast(m_vt.m_min.p.w), r.x, r.y, r.z, r.w); - const GSVector4i fpr = r.sll32(4); + const GSVector4i fpr = r.sll32<4>(); v[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + fpr.x); v[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + fpr.y); @@ -487,7 +487,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS const GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V); - tmp = GSVector4i(tmp - offset).srl32(1) + offset; + tmp = GSVector4i(tmp - offset).srl32<1>() + offset; v[i].XYZ.Y = static_cast(tmp.x); v[i + 1].XYZ.Y = static_cast(tmp.z); @@ -525,7 +525,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS const GSVector4i offset(o.OFY, o.OFY); GSVector4i tmp(v[i].XYZ.Y, v[i + 1].XYZ.Y); - tmp = GSVector4i(tmp - offset).srl32(1) + offset; + tmp = GSVector4i(tmp - offset).srl32<1>() + offset; //fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y); v[i].XYZ.Y = static_cast(tmp.x); @@ -1472,7 +1472,7 @@ void GSRendererHW::SwSpriteRender() // Apply TFX pxAssert(tex0_tfx == 0 || tex0_tfx == 1); if (tex0_tfx == 0) - sc = sc.mul16l(vc).srl16(7).clamp8(); // clamp((sc * vc) >> 7, 0, 255), srl16 is ok because 16 bit values are unsigned + sc = sc.mul16l(vc).srl16<7>().clamp8(); // clamp((sc * vc) >> 7, 0, 255), srl16 is ok because 16 bit values are unsigned if (tex0_tcc == 0) sc = sc.blend(vc, a_mask); @@ -1502,7 +1502,7 @@ void GSRendererHW::SwSpriteRender() .ps32() // 0x00AA00AA00aa00aa00AA00AA00aa00aa .xxyy(); // 0x00AA00AA00AA00AA00aa00aa00aa00aa const GSVector4i D = alpha_d == 0 ? sc : alpha_d == 1 ? dc0 : GSVector4i::zero(); - dc = A.sub16(B).mul16l(C).sra16(7).add16(D); // (((A - B) * C) >> 7) + D, must use sra16 due to signed 16 bit values. + dc = A.sub16(B).mul16l(C).sra16<7>().add16(D); // (((A - B) * C) >> 7) + D, must use sra16 due to signed 16 bit values. // dc alpha channels (dc.u16[3], dc.u16[7]) dirty } else @@ -1514,7 +1514,7 @@ void GSRendererHW::SwSpriteRender() if (m_draw_env->COLCLAMP.CLAMP) dc = dc.clamp8(); // clamp(dc, 0, 255) else - dc = dc.sll16(8).srl16(8); // Mask, lower 8 bits enabled per channel + dc = dc.sll16<8>().srl16<8>(); // Mask, lower 8 bits enabled per channel // No Alpha Correction pxAssert(m_context->FBA.FBA == 0); @@ -6535,8 +6535,8 @@ bool GSRendererHW::IsReallyDithered() const void GSRendererHW::ReplaceVerticesWithSprite(const GSVector4i& unscaled_rect, const GSVector4i& unscaled_uv_rect, const GSVector2i& unscaled_size, const GSVector4i& scissor) { - const GSVector4i fpr = unscaled_rect.sll32(4); - const GSVector4i fpuv = unscaled_uv_rect.sll32(4); + const GSVector4i fpr = unscaled_rect.sll32<4>(); + const GSVector4i fpuv = unscaled_uv_rect.sll32<4>(); GSVertex* v = m_vertex.buff; v[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + fpr.x); @@ -6615,7 +6615,7 @@ GSHWDrawConfig& GSRendererHW::BeginHLEHardwareDraw( vertices[i].V = v; \ } while (0) - const GSVector4i fp_rect = unscaled_rect.sll32(4); + const GSVector4i fp_rect = unscaled_rect.sll32<4>(); V(0, fp_rect.x, fp_rect.y, fp_rect.x, fp_rect.y); // top-left V(1, fp_rect.z, fp_rect.y, fp_rect.z, fp_rect.y); // top-right V(2, fp_rect.x, fp_rect.w, fp_rect.x, fp_rect.w); // bottom-left diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp index 05d6b40a077df1..efe8f3bc59965c 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp +++ b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp @@ -323,7 +323,7 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u16* index, cons c = c.upl16(c.zwxy()); if (sel.tfx == TFX_NONE) - c = c.srl16(7); + c = c.srl16<7>(); local.c.rb = c.xxxx(); local.c.ga = c.zzzz(); @@ -516,9 +516,9 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV if (sel.edge) { #if _M_SSE >= 0x501 - cov = GSVector8i::broadcast16(GSVector4i::cast(scan.p)).srl16(9); + cov = GSVector8i::broadcast16(GSVector4i::cast(scan.p)).srl16<9>(); #else - cov = GSVector4i::cast(scan.p).xxxxl().xxxx().srl16(9); + cov = GSVector4i::cast(scan.p).xxxxl().xxxx().srl16<9>(); #endif } @@ -537,7 +537,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } else if (sel.ltf) { - vf = v.xxzzlh().srl16(12); + vf = v.xxzzlh().srl16<12>(); } s = VectorF::cast(u); @@ -649,8 +649,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV switch (sel.zpsm) { - case 1: zdo = zdo.sll32( 8).srl32( 8); break; - case 2: zdo = zdo.sll32(16).srl32(16); break; + case 1: zdo = zdo.sll32< 8>().srl32<8>(); break; + case 2: zdo = zdo.sll32<16>().srl32<16>(); break; default: break; } @@ -705,7 +705,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV lod += 0x8000; } - lodi = lod.srl32(16); + lodi = lod.srl32<16>(); if (sel.mmin == 2) // trilinear mode { @@ -787,11 +787,11 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV u -= 0x8000; v -= 0x8000; - uf = u.xxzzlh().srl16(12); - vf = v.xxzzlh().srl16(12); + uf = u.xxzzlh().srl16<12>(); + vf = v.xxzzlh().srl16<12>(); } - VectorI uv0 = u.sra32(16).ps32(v.sra32(16)); + VectorI uv0 = u.sra32<16>().ps32(v.sra32<16>()); VectorI uv1 = uv0; { @@ -849,18 +849,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } } - VectorI rb00 = c00.sll16(8).srl16(8); - VectorI ga00 = c00.srl16(8); - VectorI rb01 = c01.sll16(8).srl16(8); - VectorI ga01 = c01.srl16(8); + VectorI rb00 = c00.sll16<8>().srl16<8>(); + VectorI ga00 = c00.srl16<8>(); + VectorI rb01 = c01.sll16<8>().srl16<8>(); + VectorI ga01 = c01.srl16<8>(); rb00 = rb00.lerp16_4(rb01, uf); ga00 = ga00.lerp16_4(ga01, uf); - VectorI rb10 = c10.sll16(8).srl16(8); - VectorI ga10 = c10.srl16(8); - VectorI rb11 = c11.sll16(8).srl16(8); - VectorI ga11 = c11.srl16(8); + VectorI rb10 = c10.sll16<8>().srl16<8>(); + VectorI ga10 = c10.srl16<8>(); + VectorI rb11 = c11.sll16<8>().srl16<8>(); + VectorI ga11 = c11.srl16<8>(); rb10 = rb10.lerp16_4(rb11, uf); ga10 = ga10.lerp16_4(ga11, uf); @@ -887,8 +887,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } } - rb = c00.sll16(8).srl16(8); - ga = c00.srl16(8); + rb = c00.sll16<8>().srl16<8>(); + ga = c00.srl16<8>(); } if (sel.mmin != 1) // !round-off mode @@ -897,22 +897,22 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV lodi += VectorI::x00000001(); - u = uv[0].sra32(1); - v = uv[1].sra32(1); + u = uv[0].sra32<1>(); + v = uv[1].sra32<1>(); - minuv = minuv.srl16(1); - maxuv = maxuv.srl16(1); + minuv = minuv.srl16<1>(); + maxuv = maxuv.srl16<1>(); if (sel.ltf) { u -= 0x8000; v -= 0x8000; - uf = u.xxzzlh().srl16(12); - vf = v.xxzzlh().srl16(12); + uf = u.xxzzlh().srl16<12>(); + vf = v.xxzzlh().srl16<12>(); } - VectorI uv0 = u.sra32(16).ps32(v.sra32(16)); + VectorI uv0 = u.sra32<16>().ps32(v.sra32<16>()); VectorI uv1 = uv0; { @@ -970,18 +970,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } } - VectorI rb00 = c00.sll16(8).srl16(8); - VectorI ga00 = c00.srl16(8); - VectorI rb01 = c01.sll16(8).srl16(8); - VectorI ga01 = c01.srl16(8); + VectorI rb00 = c00.sll16<8>().srl16<8>(); + VectorI ga00 = c00.srl16<8>(); + VectorI rb01 = c01.sll16<8>().srl16<8>(); + VectorI ga01 = c01.srl16<8>(); rb00 = rb00.lerp16_4(rb01, uf); ga00 = ga00.lerp16_4(ga01, uf); - VectorI rb10 = c10.sll16(8).srl16(8); - VectorI ga10 = c10.srl16(8); - VectorI rb11 = c11.sll16(8).srl16(8); - VectorI ga11 = c11.srl16(8); + VectorI rb10 = c10.sll16<8>().srl16<8>(); + VectorI ga10 = c10.srl16<8>(); + VectorI rb11 = c11.sll16<8>().srl16<8>(); + VectorI ga11 = c11.srl16<8>(); rb10 = rb10.lerp16_4(rb11, uf); ga10 = ga10.lerp16_4(ga11, uf); @@ -1008,14 +1008,14 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } } - rb2 = c00.sll16(8).srl16(8); - ga2 = c00.srl16(8); + rb2 = c00.sll16<8>().srl16<8>(); + ga2 = c00.srl16<8>(); } if (sel.lcm) lodf = global.lod.f; - lodf = lodf.srl16(1); + lodf = lodf.srl16<1>(); rb = rb.lerp16<0>(rb2, lodf); ga = ga.lerp16<0>(ga2, lodf); @@ -1042,15 +1042,15 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV if (sel.ltf) { - uf = u.xxzzlh().srl16(12); + uf = u.xxzzlh().srl16<12>(); if (sel.prim != GS_SPRITE_CLASS) { - vf = v.xxzzlh().srl16(12); + vf = v.xxzzlh().srl16<12>(); } } - VectorI uv0 = u.sra32(16).ps32(v.sra32(16)); + VectorI uv0 = u.sra32<16>().ps32(v.sra32<16>()); VectorI uv1 = uv0; VectorI tmin = VectorI::broadcast128(global.t.min); @@ -1105,18 +1105,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV c11 = addr11.gather32_32(tex); } - VectorI rb00 = c00.sll16(8).srl16(8); - VectorI ga00 = c00.srl16(8); - VectorI rb01 = c01.sll16(8).srl16(8); - VectorI ga01 = c01.srl16(8); + VectorI rb00 = c00.sll16<8>().srl16<8>(); + VectorI ga00 = c00.srl16<8>(); + VectorI rb01 = c01.sll16<8>().srl16<8>(); + VectorI ga01 = c01.srl16<8>(); rb00 = rb00.lerp16_4(rb01, uf); ga00 = ga00.lerp16_4(ga01, uf); - VectorI rb10 = c10.sll16(8).srl16(8); - VectorI ga10 = c10.srl16(8); - VectorI rb11 = c11.sll16(8).srl16(8); - VectorI ga11 = c11.srl16(8); + VectorI rb10 = c10.sll16<8>().srl16<8>(); + VectorI ga10 = c10.srl16<8>(); + VectorI rb11 = c11.sll16<8>().srl16<8>(); + VectorI ga11 = c11.srl16<8>(); rb10 = rb10.lerp16_4(rb11, uf); ga10 = ga10.lerp16_4(ga11, uf); @@ -1137,8 +1137,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV c00 = addr00.gather32_32((const u32*)global.tex[0]); } - rb = c00.sll16(8).srl16(8); - ga = c00.srl16(8); + rb = c00.sll16<8>().srl16<8>(); + ga = c00.srl16<8>(); } } } @@ -1152,21 +1152,21 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV case TFX_MODULATE: ga = ga.modulate16<1>(gaf).clamp8(); if (!sel.tcc) - ga = ga.mix16(gaf.srl16(7)); + ga = ga.mix16(gaf.srl16<7>()); break; case TFX_DECAL: if (!sel.tcc) - ga = ga.mix16(gaf.srl16(7)); + ga = ga.mix16(gaf.srl16<7>()); break; case TFX_HIGHLIGHT: - ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); + ga = ga.mix16(!sel.tcc ? gaf.srl16<7>() : ga.addus8(gaf.srl16<7>())); break; case TFX_HIGHLIGHT2: if (!sel.tcc) - ga = ga.mix16(gaf.srl16(7)); + ga = ga.mix16(gaf.srl16<7>()); break; case TFX_NONE: - ga = sel.iip ? gaf.srl16(7) : gaf; + ga = sel.iip ? gaf.srl16<7>() : gaf; break; } @@ -1182,7 +1182,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV } else { - ga = ga.blend8(a, ga.eq16(x00800080).srl32(16).sll32(16)); + ga = ga.blend8(a, ga.eq16(x00800080).srl32<16>().sll32<16>()); } } } @@ -1219,12 +1219,12 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV break; case TFX_HIGHLIGHT: case TFX_HIGHLIGHT2: - af = gaf.yywwlh().srl16(7); + af = gaf.yywwlh().srl16<7>(); rb = rb.modulate16<1>(rbf).add16(af).clamp8(); ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); break; case TFX_NONE: - rb = sel.iip ? rbf.srl16(7) : rbf; + rb = sel.iip ? rbf.srl16<7>() : rbf; break; } } @@ -1249,12 +1249,12 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV ga = fga.lerp16<0>(ga, fog).mix16(ga); /* - fog = fog.srl16(7); + fog = fog.srl16<7>(); VectorI ifog = VectorI::x00ff().sub16(fog); - rb = rb.mul16l(fog).add16(frb.mul16l(ifog)).srl16(8); - ga = ga.mul16l(fog).add16(fga.mul16l(ifog)).srl16(8).mix16(ga); + rb = rb.mul16l(fog).add16(frb.mul16l(ifog)).srl16<8>(); + ga = ga.mul16l(fog).add16(fga.mul16l(ifog)).srl16<8>().mix16(ga); */ } @@ -1285,22 +1285,22 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV if (sel.fpsm == 2) { // test |= fd.srl32(15) == VectorI::zero(); - test |= fd.sll32(16).sra32(31) == VectorI::zero(); + test |= fd.sll32<16>().sra32<31>() == VectorI::zero(); } else { - test |= (~fd).sra32(31); + test |= (~fd).sra32<31>(); } } else { if (sel.fpsm == 2) { - test |= fd.sll32(16).sra32(31); // == VectorI::xffffffff(); + test |= fd.sll32<16>().sra32<31>(); // == VectorI::xffffffff(); } else { - test |= fd.sra32(31); + test |= fd.sra32<31>(); } } @@ -1419,8 +1419,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV { case 0: case 1: - rbd = fd.sll16(8).srl16(8); - gad = fd.srl16(8); + rbd = fd.sll16<8>().srl16<8>(); + gad = fd.srl16<8>(); break; case 2: rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); @@ -1449,8 +1449,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV { switch(sel.abc) { - case 0: a = gas.yywwlh().sll16(7); break; - case 1: a = gad.yywwlh().sll16(7); break; + case 0: a = gas.yywwlh().sll16<7>(); break; + case 1: a = gad.yywwlh().sll16<7>(); break; case 2: a = global.afix; break; } @@ -1476,7 +1476,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV if (sel.pabe) { - mask = (gas << 8).sra32(31); + mask = (gas << 8).sra32<31>(); rb = rbs.blend8(rb, mask); }