Skip to content

Commit

Permalink
EEDI3/vectorclass/vectorf128.h, vectorf256.h: remove use of _mm256_rs…
Browse files Browse the repository at this point in the history
…qrt14_ps, _mm256_rsqrt14_ps

somehow they are not supported by MSVC in /arch:AVX512 mode?

Signed-off-by: akarin <[email protected]>
  • Loading branch information
AkarinVS committed Nov 25, 2021
1 parent 2e43d66 commit bab3c69
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
18 changes: 9 additions & 9 deletions EEDI3/vectorclass/vectorf128.h
Original file line number Diff line number Diff line change
Expand Up @@ -1078,15 +1078,15 @@ static inline Vec4f approx_recipr(Vec4f const & a) {
// approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^-11)
static inline Vec4f approx_rsqrt(Vec4f const & a) {
#if INSTRSET >= 9 // use more accurate version if available. (none of these will raise exceptions on zero)
#ifdef __AVX512ER__ // AVX512ER: full precision
// todo: if future processors have both AVX512ER and AVX521VL: _mm128_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
return _mm512_castps512_ps128(_mm512_rsqrt28_round_ps(_mm512_castps128_ps512(a), _MM_FROUND_NO_EXC));
#elif defined __AVX512VL__ // AVX512VL: 14 bit precision
return _mm_rsqrt14_ps(a);
#else // AVX512F: 14 bit precision
return _mm512_castps512_ps128(_mm512_rsqrt14_ps(_mm512_castps128_ps512(a)));
#endif
#else // AVX: 11 bit precision
//#ifdef __AVX512ER__ // AVX512ER: full precision
// // todo: if future processors have both AVX512ER and AVX521VL: _mm128_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
// return _mm512_castps512_ps128(_mm512_rsqrt28_round_ps(_mm512_castps128_ps512(a), _MM_FROUND_NO_EXC));
//#elif defined __AVX512VL__ // AVX512VL: 14 bit precision
// return _mm_rsqrt14_ps(a);
//#else // AVX512F: 14 bit precision
// return _mm512_castps512_ps128(_mm512_rsqrt14_ps(_mm512_castps128_ps512(a)));
//#endif
//#else // AVX: 11 bit precision
return _mm_rsqrt_ps(a);
#endif
}
Expand Down
18 changes: 9 additions & 9 deletions EEDI3/vectorclass/vectorf256.h
Original file line number Diff line number Diff line change
Expand Up @@ -1166,15 +1166,15 @@ static inline Vec8f approx_recipr(Vec8f const & a) {
// approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^-11)
static inline Vec8f approx_rsqrt(Vec8f const & a) {
#if INSTRSET >= 9 // use more accurate version if available. (none of these will raise exceptions on zero)
#ifdef __AVX512ER__ // AVX512ER: full precision
// todo: if future processors have both AVX512ER and AVX521VL: _mm256_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
return _mm512_castps512_ps256(_mm512_rsqrt28_round_ps(_mm512_castps256_ps512(a), _MM_FROUND_NO_EXC));
#elif defined __AVX512VL__ // AVX512VL: 14 bit precision
return _mm256_rsqrt14_ps(a);
#else // AVX512F: 14 bit precision
return _mm512_castps512_ps256(_mm512_rsqrt14_ps(_mm512_castps256_ps512(a)));
#endif
#else // AVX: 11 bit precision
//#ifdef __AVX512ER__ // AVX512ER: full precision
// // todo: if future processors have both AVX512ER and AVX521VL: _mm256_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
// return _mm512_castps512_ps256(_mm512_rsqrt28_round_ps(_mm512_castps256_ps512(a), _MM_FROUND_NO_EXC));
//#elif defined __AVX512VL__ // AVX512VL: 14 bit precision
// return _mm256_rsqrt14_ps(a);
//#else // AVX512F: 14 bit precision
// return _mm512_castps512_ps256(_mm512_rsqrt14_ps(_mm512_castps256_ps512(a)));
//#endif
//#else // AVX: 11 bit precision
return _mm256_rsqrt_ps(a);
#endif
}
Expand Down

0 comments on commit bab3c69

Please sign in to comment.