From 942841b2047cafb287536ce378f0fc6d29d794bf Mon Sep 17 00:00:00 2001 From: Alex K Date: Tue, 16 Jul 2024 11:37:33 -0400 Subject: [PATCH] Use 128 bit fnmadd_pd to do 256 bit fnmadd_pd --- simde/x86/fma.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/simde/x86/fma.h b/simde/x86/fma.h index 630efc54a..1a1104f65 100644 --- a/simde/x86/fma.h +++ b/simde/x86/fma.h @@ -464,11 +464,16 @@ simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { b_ = simde__m256d_to_private(b), c_ = simde__m256d_to_private(c); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_fnmadd_pd(a_.m128d[i], b_.m128d[i], c_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif return simde__m256d_from_private(r_); #endif }