From 3fff79d81b841482f3bde82dde8107b4e961325a Mon Sep 17 00:00:00 2001 From: RealTimeChris <40668522+RealTimeChris@users.noreply.github.com> Date: Sun, 17 Sep 2023 19:42:58 -0400 Subject: [PATCH 1/2] fix/voice: Updating to fix the clamping behavior of the AVX-512 implementation. (#861) --- include/dpp/isa_detection.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/include/dpp/isa_detection.h b/include/dpp/isa_detection.h index 233baedb7f..0c5fe7b398 100644 --- a/include/dpp/isa_detection.h +++ b/include/dpp/isa_detection.h @@ -155,11 +155,11 @@ namespace dpp { * @return An AVX512 register containing gathered values. */ template inline static avx_512_float gather_values(value_type* values) { - float new_array[byte_blocks_per_register]{}; + alignas(64) float new_array[byte_blocks_per_register]{}; for (size_t x = 0; x < byte_blocks_per_register; ++x) { new_array[x] = static_cast(values[x]); } - return _mm512_loadu_ps(new_array); + return _mm512_load_ps(new_array); } /** @@ -177,9 +177,13 @@ namespace dpp { _mm512_mul_ps(_mm512_set1_ps(increment), _mm512_set_ps(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)))) }; - current_samples_new = _mm512_mask_blend_ps(_mm512_cmp_ps_mask(current_samples_new, _mm512_set1_ps(0.0f), _CMP_GE_OQ), - _mm512_max_ps(current_samples_new, _mm512_set1_ps(static_cast(std::numeric_limits::min()))), - _mm512_min_ps(current_samples_new, _mm512_set1_ps(static_cast(std::numeric_limits::max())))); + __m512 lower_limit = _mm512_set1_ps(static_cast(std::numeric_limits::min())); + __m512 upper_limit = _mm512_set1_ps(static_cast(std::numeric_limits::max())); + + __mmask16 mask_ge = _mm512_cmp_ps_mask(current_samples_new, _mm512_set1_ps(0.0f), _CMP_GE_OQ); + + current_samples_new = _mm512_mask_max_ps(current_samples_new, mask_ge, current_samples_new, lower_limit); + current_samples_new = _mm512_mask_min_ps(current_samples_new, ~mask_ge, current_samples_new, upper_limit); store_values(_mm512_cvtps_epi32(current_samples_new), data_out); } @@ -228,11 +232,11 @@ namespace dpp { * @return An AVX2 register containing gathered values. */ template inline static avx_2_float gather_values(value_type* values) { - float new_array[byte_blocks_per_register]{}; + alignas(32) float new_array[byte_blocks_per_register]{}; for (size_t x = 0; x < byte_blocks_per_register; ++x) { new_array[x] = static_cast(values[x]); } - return _mm256_loadu_ps(new_array); + return _mm256_load_ps(new_array); } /** @@ -302,11 +306,11 @@ namespace dpp { * @return An AVX register containing gathered values. */ template inline static avx_float gather_values(value_type* values) { - float new_array[byte_blocks_per_register]{}; + alignas(16) float new_array[byte_blocks_per_register]{}; for (size_t x = 0; x < byte_blocks_per_register; ++x) { new_array[x] = static_cast(values[x]); } - return _mm_loadu_ps(new_array); + return _mm_load_ps(new_array); } /** @@ -365,8 +369,8 @@ namespace dpp { */ inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { for (uint64_t x = 0; x < byte_blocks_per_register; ++x) { - auto increment_neww = increment * x; - auto current_gain_new = current_gain + increment_neww; + auto increment_new = increment * x; + auto current_gain_new = current_gain + increment_new; auto current_sample_new = data_in[x] * current_gain_new; if (current_sample_new >= std::numeric_limits::max()) { current_sample_new = std::numeric_limits::max(); From c79da97252e389a6cbd6ca4d4f6e2e868c870c5e Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 19 Sep 2023 20:05:58 +0300 Subject: [PATCH 2/2] fix(cmake): fix include_directories for installed target (#863) --- library-vcpkg/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/library-vcpkg/CMakeLists.txt b/library-vcpkg/CMakeLists.txt index a8377cf699..a06ecf8163 100644 --- a/library-vcpkg/CMakeLists.txt +++ b/library-vcpkg/CMakeLists.txt @@ -47,8 +47,9 @@ target_compile_features( ) target_include_directories( - "${LIB_NAME}" PRIVATE + "${LIB_NAME}" PUBLIC "$" + "$" ) set_target_properties(