Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVX512F/fmaddsub: Implement Instruction #1246

Merged
merged 5 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ simde_avx512_families = [
'fixupimm_round',
'flushsubnormal',
'fmadd',
'fmaddsub',
'fmsub',
'fnmadd',
'fnmsub',
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#include "avx512/fixupimm_round.h"
#include "avx512/flushsubnormal.h"
#include "avx512/fmadd.h"
#include "avx512/fmaddsub.h"
#include "avx512/fmsub.h"
#include "avx512/fnmadd.h"
#include "avx512/fnmsub.h"
Expand Down
91 changes: 91 additions & 0 deletions simde/x86/avx512/fmaddsub.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#if !defined(SIMDE_X86_AVX512_FMADDSUB_H)
#define SIMDE_X86_AVX512_FMADDSUB_H

#include "types.h"
#include "../fma.h"
#include "mul.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

SIMDE_FUNCTION_ATTRIBUTES
simde__m512d
simde_x_mm512_addsub_pd (simde__m512d a, simde__m512d b) {
//mm512_addsub_pd does not exist, but we define it for utility purposes (only with simde_x prefix, no native alias)
simde__m512d_private
r_,
a_ = simde__m512d_to_private(a),
b_ = simde__m512d_to_private(b);

#if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
r_.m256d[0] = simde_mm256_addsub_pd(a_.m256d[0], b_.m256d[0]);
r_.m256d[1] = simde_mm256_addsub_pd(a_.m256d[1], b_.m256d[1]);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ];
r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1];
}
#endif

return simde__m512d_from_private(r_);
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m512
simde_x_mm512_addsub_ps (simde__m512 a, simde__m512 b) {
//mm512_addsub_ps does not exist, but we define it for utility purposes (only with simde_x prefix, no native alias)
simde__m512_private
r_,
a_ = simde__m512_to_private(a),
b_ = simde__m512_to_private(b);

#if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
r_.m256[0] = simde_mm256_addsub_ps(a_.m256[0], b_.m256[0]);
r_.m256[1] = simde_mm256_addsub_ps(a_.m256[1], b_.m256[1]);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ];
r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1];
}
#endif

return simde__m512_from_private(r_);
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_fmaddsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) {
#if defined(SIMDE_X86_AVX512F_NATIVE)
return _mm512_fmaddsub_pd(a, b, c);
#else
return simde_x_mm512_addsub_pd(simde_mm512_mul_pd(a, b), c);
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_fmaddsub_pd
#define _mm512_fmaddsub_pd(a, b, c) simde_mm512_fmaddsub_pd(a, b, c)
#endif


SIMDE_FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_fmaddsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) {
#if defined(SIMDE_X86_AVX512F_NATIVE)
return _mm512_fmaddsub_ps(a, b, c);
#else
return simde_x_mm512_addsub_ps(simde_mm512_mul_ps(a, b), c);
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_fmaddsub_ps
#define _mm512_fmaddsub_ps(a, b, c) simde_mm512_fmaddsub_ps(a, b, c)
#endif


SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP

#endif /* !defined(SIMDE_X86_AVX512_FMADDSUB_H) */
Loading
Loading