From cd626fc20d2bfd4673dbb7a7008b705fb5cc4992 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 4 May 2023 22:15:44 +0200 Subject: [PATCH 1/2] run test from #926 Co-authored-by: Thomas Daede --- test/x86/avx.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/x86/avx.c b/test/x86/avx.c index 5bea8acd9..b8a59373c 100644 --- a/test/x86/avx.c +++ b/test/x86/avx.c @@ -13795,9 +13795,12 @@ test_simde_mm256_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) { simde_mm256_set_pd(SIMDE_FLOAT64_C( -365.57), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( 333.70), SIMDE_FLOAT64_C( 26.68)) } }; + uint64_t a[] = {1, 0, 0, 0}; + uint64_t b[] = {0, 0, 1, 0}; + int64_t target[4] = {INT64_C(1), INT64_C(0), INT64_C(0), INT64_C(0) }; + simde__m256d r, tmp_0_yd, tmp_1_yd; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { - simde__m256d r; r = simde_mm256_shuffle_pd(test_vec[i].a, test_vec[i].b, 0x5); simde_assert_m256d_close(r, test_vec[i].r1, 1); @@ -13806,6 +13809,14 @@ test_simde_mm256_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) { simde_assert_m256d_close(r, test_vec[i].r2, 1); } + tmp_0_yd = simde_mm256_loadu_pd(HEDLEY_REINTERPRET_CAST(double*, a)); + tmp_1_yd = simde_mm256_loadu_pd(HEDLEY_REINTERPRET_CAST(double*, b)); + r = simde_mm256_shuffle_pd(tmp_0_yd, tmp_1_yd, 0xc); // 0b1100 + + simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_loadu_epi64(target)); + + //simde_test_x86_write_i64x4(2, simde_mm256_castpd_si256(r), SIMDE_TEST_VEC_POS_LAST); + return 0; } From 23ba2cb5baba7aa3beb56006bcf554fbc8b30e35 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Mon, 2 Oct 2023 07:50:57 +0200 Subject: [PATCH 2/2] avx: simde_mm256_shuffle_pd fix for natural vector size < 128 Closes: #926 Co-authored-by: clin99 <34017491+clin99@users.noreply.github.com> --- simde/x86/avx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simde/x86/avx.h b/simde/x86/avx.h index 4bee0d157..06485253d 100644 --- a/simde/x86/avx.h +++ b/simde/x86/avx.h @@ -5136,8 +5136,8 @@ simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) #define simde_mm256_shuffle_pd(a, b, imm8) \ simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm256_shuffle_pd(a, b, imm8) \ SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \