diff --git a/include/dpp/isa_detection.h b/include/dpp/isa_detection.h index 233baedb7f..a3057c3fd3 100644 --- a/include/dpp/isa_detection.h +++ b/include/dpp/isa_detection.h @@ -177,9 +177,13 @@ namespace dpp { _mm512_mul_ps(_mm512_set1_ps(increment), _mm512_set_ps(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)))) }; - current_samples_new = _mm512_mask_blend_ps(_mm512_cmp_ps_mask(current_samples_new, _mm512_set1_ps(0.0f), _CMP_GE_OQ), - _mm512_max_ps(current_samples_new, _mm512_set1_ps(static_cast(std::numeric_limits::min()))), - _mm512_min_ps(current_samples_new, _mm512_set1_ps(static_cast(std::numeric_limits::max())))); + __m512 lower_limit = _mm512_set1_ps(static_cast(std::numeric_limits::min())); + __m512 upper_limit = _mm512_set1_ps(static_cast(std::numeric_limits::max())); + + __mmask16 mask_ge = _mm512_cmp_ps_mask(current_samples_new, _mm512_set1_ps(0.0f), _CMP_GE_OQ); + + current_samples_new = _mm512_mask_max_ps(current_samples_new, mask_ge, current_samples_new, lower_limit); + current_samples_new = _mm512_mask_min_ps(current_samples_new, ~mask_ge, current_samples_new, upper_limit); store_values(_mm512_cvtps_epi32(current_samples_new), data_out); } @@ -365,8 +369,8 @@ namespace dpp { */ inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { for (uint64_t x = 0; x < byte_blocks_per_register; ++x) { - auto increment_neww = increment * x; - auto current_gain_new = current_gain + increment_neww; + auto increment_new = increment * x; + auto current_gain_new = current_gain + increment_new; auto current_sample_new = data_in[x] * current_gain_new; if (current_sample_new >= std::numeric_limits::max()) { current_sample_new = std::numeric_limits::max();