diff --git a/simde/x86/sse.h b/simde/x86/sse.h index 8e8532c9d..1b98bf695 100644 --- a/simde/x86/sse.h +++ b/simde/x86/sse.h @@ -262,6 +262,14 @@ enum { SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 #endif }; +#if defined(_MM_ROUND_MASK) +# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK +#else +# define SIMDE_MM_ROUND_MASK (0x6000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK +#endif #if defined(_MM_FROUND_TO_NEAREST_INT) # define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT @@ -429,7 +437,7 @@ enum { #endif SIMDE_FUNCTION_ATTRIBUTES -unsigned int +uint32_t SIMDE_MM_GET_ROUNDING_MODE(void) { #if defined(SIMDE_X86_SSE_NATIVE) return _MM_GET_ROUNDING_MODE(); @@ -477,7 +485,7 @@ SIMDE_MM_GET_ROUNDING_MODE(void) { SIMDE_FUNCTION_ATTRIBUTES void -SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { +SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { #if defined(SIMDE_X86_SSE_NATIVE) _MM_SET_ROUNDING_MODE(a); #elif defined(SIMDE_HAVE_FENV_H) @@ -531,7 +539,7 @@ SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) + #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -566,7 +574,7 @@ simde_mm_setcsr (uint32_t a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_setcsr(a); #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) diff --git a/test/x86/sse.c b/test/x86/sse.c index 828cb0048..60bc36eb5 100644 --- a/test/x86/sse.c +++ b/test/x86/sse.c @@ -5766,6 +5766,67 @@ test_simde_MM_TRANSPOSE4_PS (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_MXCSR (SIMDE_MUNIT_TEST_ARGS) { + uint32_t original_mxcsr = simde_mm_getcsr(); + uint32_t mask_rm_fzm = SIMDE_MM_ROUND_MASK | SIMDE_MM_FLUSH_ZERO_MASK; + uint32_t masked_mxcsr = original_mxcsr & ~mask_rm_fzm; + + uint32_t rm_nearest_off, fzm_nearest_off, rm_nearest_on, fzm_nearest_on; + uint32_t rm_down_off, fzm_down_off, rm_down_on, fzm_down_on; + uint32_t rm_up_off, fzm_up_off, rm_up_on, fzm_up_on; + uint32_t rm_zero_off, fzm_zero_off, rm_zero_on, fzm_zero_on; + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_NEAREST | SIMDE_MM_FLUSH_ZERO_OFF); + rm_nearest_off = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_nearest_off = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_NEAREST | SIMDE_MM_FLUSH_ZERO_ON); + rm_nearest_on = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_nearest_on = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_DOWN | SIMDE_MM_FLUSH_ZERO_OFF); + rm_down_off = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_down_off = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_DOWN | SIMDE_MM_FLUSH_ZERO_ON); + rm_down_on = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_down_on = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_UP | SIMDE_MM_FLUSH_ZERO_OFF); + rm_up_off = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_up_off = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_UP | SIMDE_MM_FLUSH_ZERO_ON); + rm_up_on = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_up_on = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_TOWARD_ZERO | SIMDE_MM_FLUSH_ZERO_OFF); + rm_zero_off = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_zero_off = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_TOWARD_ZERO | SIMDE_MM_FLUSH_ZERO_ON); + rm_zero_on = SIMDE_MM_GET_ROUNDING_MODE(); + fzm_zero_on = SIMDE_MM_GET_FLUSH_ZERO_MODE(); + + simde_mm_setcsr(original_mxcsr); + + simde_assert_equal_u32(rm_nearest_off, rm_nearest_on); + simde_assert_equal_u32(rm_down_off, rm_down_on); + simde_assert_equal_u32(rm_up_off, rm_up_on); + simde_assert_equal_u32(rm_zero_off, rm_zero_on); + + simde_assert_equal_u32(fzm_nearest_off, fzm_down_off); + simde_assert_equal_u32(fzm_nearest_off, fzm_up_off); + simde_assert_equal_u32(fzm_nearest_off, fzm_zero_off); + + simde_assert_equal_u32(fzm_nearest_on, fzm_down_on); + simde_assert_equal_u32(fzm_nearest_on, fzm_up_on); + simde_assert_equal_u32(fzm_nearest_on, fzm_zero_on); + + return 0; +} + SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ps1) @@ -5915,6 +5976,7 @@ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_prefetch) SIMDE_TEST_FUNC_LIST_ENTRY(MM_TRANSPOSE4_PS) + SIMDE_TEST_FUNC_LIST_ENTRY(MXCSR) SIMDE_TEST_FUNC_LIST_END #include