Skip to content

Commit

Permalink
EE Cache: Let's blow the roof off PCSX2
Browse files Browse the repository at this point in the history
  • Loading branch information
F0bes committed Dec 23, 2024
1 parent ccaf224 commit 3fb5be9
Showing 1 changed file with 61 additions and 6 deletions.
67 changes: 61 additions & 6 deletions pcsx2/vtlb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,50 +119,105 @@ __noinline int CheckCache(u32 addr)
}

const size_t size = cachedTlbs.count;
const int stride = 4;

__m128i addr_vec = _mm_set1_epi32(addr);
const int stride = 8;

//__m128i addr_vec = _mm_set1_epi32(addr);
__m256i addr_vec = _mm256_set1_epi32(addr);
size_t i = 0;

for (; i + stride <= size; i += stride)
{
/*
__m128i pfn1_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.PFN1s[i]));
__m128i pfn0_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.PFN0s[i]));
__m128i mask_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.PageMasks[i]));
*/

__m256i pfn1_vec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&cachedTlbs.PFN1s[i]));
__m256i pfn0_vec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&cachedTlbs.PFN0s[i]));
__m256i mask_vec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&cachedTlbs.PageMasks[i]));

/*
__m128i cached1_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.CacheEnabled1[i]));
__m128i cached0_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.CacheEnabled0[i]));
*/

__m256i cached1_vec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&cachedTlbs.CacheEnabled1[i]));
__m256i cached0_vec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&cachedTlbs.CacheEnabled0[i]));

/*
__m128i pfn1_end_vec = _mm_add_epi32(pfn1_vec, mask_vec);
__m128i pfn0_end_vec = _mm_add_epi32(pfn0_vec, mask_vec);
*/

__m256i pfn1_end_vec = _mm256_add_epi32(pfn1_vec, mask_vec);
__m256i pfn0_end_vec = _mm256_add_epi32(pfn0_vec, mask_vec);

/*
__m128i gteLowerBound0 = _mm_or_si128(
_mm_cmpgt_epi32(addr_vec, pfn0_vec),
_mm_cmpeq_epi32(addr_vec, pfn0_vec));
__m128i gteUpperBound0 = _mm_or_si128(
_mm_cmpgt_epi32(pfn0_end_vec, addr_vec),
_mm_cmpeq_epi32(pfn0_end_vec, addr_vec));
*/

__m256i gteLowerBound0 = _mm256_or_si256(
_mm256_cmpgt_epi32(addr_vec, pfn0_vec),
_mm256_cmpeq_epi32(addr_vec, pfn0_vec));

__m256i gteUpperBound0 = _mm256_or_si256(
_mm256_cmpgt_epi32(pfn0_end_vec, addr_vec),
_mm256_cmpeq_epi32(pfn0_end_vec, addr_vec));

/*
__m128i gteUpperBound1 = _mm_or_si128(
_mm_cmpgt_epi32(pfn1_end_vec, addr_vec),
_mm_cmpeq_epi32(pfn1_end_vec, addr_vec));
__m128i gteLowerBound1 = _mm_or_si128(
_mm_cmpgt_epi32(addr_vec, pfn1_vec),
_mm_cmpeq_epi32(addr_vec, pfn1_vec));
*/

__m256i gteUpperBound1 = _mm256_or_si256(
_mm256_cmpgt_epi32(pfn1_end_vec, addr_vec),
_mm256_cmpeq_epi32(pfn1_end_vec, addr_vec));

__m256i gteLowerBound1 = _mm256_or_si256(
_mm256_cmpgt_epi32(addr_vec, pfn1_vec),
_mm256_cmpeq_epi32(addr_vec, pfn1_vec));

/*
__m128i cmp0 = _mm_and_si128(gteLowerBound0, gteUpperBound0);
__m128i cmp1 = _mm_and_si128(gteLowerBound1, gteUpperBound1);
*/

__m256i cmp0 = _mm256_and_si256(gteLowerBound0, gteUpperBound0);
__m256i cmp1 = _mm256_and_si256(gteLowerBound1, gteUpperBound1);

cmp1 = _mm_and_si128(cmp1, cached1_vec);
cmp0 = _mm_and_si128(cmp0, cached0_vec);

__m128i cmp = _mm_or_si128(cmp1, cmp0);
//cmp1 = _mm_and_si128(cmp1, cached1_vec);
//cmp0 = _mm_and_si128(cmp0, cached0_vec);

cmp1 = _mm256_and_si256(cmp1, cached1_vec);
cmp0 = _mm256_and_si256(cmp0, cached0_vec);

//__m128i cmp = _mm_or_si128(cmp1, cmp0);

__m256i cmp = _mm256_or_si256(cmp1, cmp0);
//__m128i cmp = _mm_or_si128(cmp1, cmp0);

/*
if (!_mm_testz_si128(cmp, cmp))
{
return true;
}
*/

if (!_mm256_testz_si256(cmp, cmp))
{
return true;
}
}

for (; i < size; i++)
Expand Down

0 comments on commit 3fb5be9

Please sign in to comment.