Skip to content

Commit

Permalink
[Feat] Add BF16 when the machine is supported.
Browse files Browse the repository at this point in the history
Finished: vld1_bf16_x4 and vld1q_bf16_x2
  • Loading branch information
yyctw committed Oct 31, 2023
1 parent 55888b2 commit 11a06a4
Show file tree
Hide file tree
Showing 18 changed files with 650 additions and 37 deletions.
4 changes: 2 additions & 2 deletions docker/cross-files/aarch64-clang-15-ccache.cross
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ ld = 'llvm-ld-15'
exe_wrapper = ['qemu-aarch64-static', '-L', '/usr/aarch64-linux-gnu']

[properties]
c_args = ['--target=aarch64-linux-gnu', '-march=armv8-a+simd+crypto+crc', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror']
cpp_args = ['--target=aarch64-linux-gnu', '-march=armv8-a+simd+crypto+crc', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror']
c_args = ['--target=aarch64-linux-gnu', '-march=armv8.2-a+simd+crypto+crc+bf16', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror']
cpp_args = ['--target=aarch64-linux-gnu', '-march=armv8.2-a+simd+crypto+crc+bf16', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-fno-lax-vector-conversions', '-Werror']
c_link_args = ['--target=aarch64-linux-gnu']
cpp_link_args = ['--target=aarch64-linux-gnu']

Expand Down
4 changes: 2 additions & 2 deletions docker/cross-files/aarch64-gcc-12-ccache.cross
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ ld = 'aarch64-linux-gnu-ld'
exe_wrapper = ['qemu-aarch64-static', '-L', '/usr/aarch64-linux-gnu']

[properties]
c_args = ['-march=armv8-a+simd+crypto+crc', '-Wextra', '-Werror']
cpp_args = ['-march=armv8-a+simd+crypto+crc', '-Wextra', '-Werror']
c_args = ['-march=armv8.2-a+simd+crypto+crc+bf16', '-Wextra', '-Werror']
cpp_args = ['-march=armv8.2-a+simd+crypto+crc+bf16', '-Wextra', '-Werror']

[host_machine]
system = 'linux'
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/ld1.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,11 +546,11 @@ simde_vldrq_p128(simde_poly128_t const ptr[HEDLEY_ARRAY_PARAM(1)]) {

#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x4_t
simde_vld1_bf16(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
return vld1_bf16(ptr);
#else
simde_bfloat16x4_private r_;
Expand All @@ -566,7 +566,7 @@ simde_vld1_bf16(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x8_t
simde_vld1q_bf16(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
return vld1q_bf16(ptr);
#else
simde_bfloat16x8_private r_;
Expand All @@ -579,7 +579,7 @@ simde_vld1q_bf16(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#define vld1q_bf16(a) simde_vld1q_bf16((a))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
9 changes: 3 additions & 6 deletions simde/arm/neon/ld1_x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,14 +387,11 @@ simde_vld1_p64_x4(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#define vld1_p64_x4(a) simde_vld1_p64_x4((a))
#endif

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x4x4_t
simde_vld1_bf16_x4(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if \
defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
(!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE)))
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
return vld1_bf16_x4(ptr);
#else
simde_bfloat16x4_private a_[4];
Expand All @@ -413,7 +410,7 @@ simde_vld1_bf16_x4(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#define vld1_bf16_x4(a) simde_vld1_bf16_x4((a))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/ld1q_x2.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,15 @@ simde_vld1q_p64_x2(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#define vld1q_p64_x2(a) simde_vld1q_p64_x2((a))
#endif

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x8x2_t
simde_vld1q_bf16_x2(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if \
defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
defined(SIMDE_ARM_NEON_BF16) && (defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
(!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
defined(SIMDE_ARM_NEON_FP16)
defined(SIMDE_ARM_NEON_FP16))
return vld1q_bf16_x2(ptr);
#else
simde_bfloat16x8_private a_[2];
Expand All @@ -389,7 +389,7 @@ simde_vld1q_bf16_x2(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#define vld1q_bf16_x2(a) simde_vld1q_bf16_x2((a))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

Expand Down
9 changes: 3 additions & 6 deletions simde/arm/neon/ld1q_x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,14 +391,11 @@ simde_vld1q_p64_x4(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#define vld1q_p64_x4(a) simde_vld1q_p64_x4((a))
#endif

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x8x4_t
simde_vld1q_bf16_x4(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if \
defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
(!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE)))
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
return vld1q_bf16_x4(ptr);
#else
simde_bfloat16x8_private a_[4];
Expand All @@ -417,7 +414,7 @@ simde_vld1q_bf16_x4(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#define vld1q_bf16_x4(a) simde_vld1q_bf16_x4((a))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/ld3.h
Original file line number Diff line number Diff line change
Expand Up @@ -826,11 +826,11 @@ simde_vld3q_p64(simde_poly64_t const *ptr) {
#define vld3q_p64(a) simde_vld3q_p64((a))
#endif

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x4x3_t
simde_vld3_bf16(simde_bfloat16 const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
return vld3_bf16(ptr);
#else
simde_bfloat16x4_private r_[3];
Expand Down Expand Up @@ -858,7 +858,7 @@ simde_vld3_bf16(simde_bfloat16 const *ptr) {
SIMDE_FUNCTION_ATTRIBUTES
simde_bfloat16x8x3_t
simde_vld3q_bf16(simde_bfloat16 const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
return vld3q_bf16(ptr);
#else
simde_bfloat16x8_private r_[3];
Expand All @@ -883,7 +883,7 @@ simde_vld3q_bf16(simde_bfloat16 const *ptr) {
#define vld3q_bf16(a) simde_vld3q_bf16((a))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/st1.h
Original file line number Diff line number Diff line change
Expand Up @@ -525,11 +525,11 @@ simde_vstrq_p128(simde_poly128_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_poly128_t val
#endif
#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
vst1_bf16(ptr, val);
#else
simde_bfloat16x4_private val_ = simde_bfloat16x4_to_private(val);
Expand All @@ -544,7 +544,7 @@ simde_vst1_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x4_t
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1q_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_bfloat16x8_t val) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
vst1q_bf16(ptr, val);
#else
simde_bfloat16x8_private val_ = simde_bfloat16x8_to_private(val);
Expand All @@ -556,7 +556,7 @@ simde_vst1q_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_bfloat16x8_t
#define vst1q_bf16(a, b) simde_vst1q_bf16((a), (b))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/st3.h
Original file line number Diff line number Diff line change
Expand Up @@ -937,11 +937,11 @@ simde_vst3q_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_poly64x2x3_t va
#define vst3q_p64(a, b) simde_vst3q_p64((a), (b))
#endif

#if defined(SIMDE_ARM_NEON_BFLOAT16)
#if !defined(SIMDE_NOT_SUPPORT_BFLOAT16)
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst3_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_bfloat16x4x3_t val) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
vst3_bf16(ptr, val);
#else
simde_bfloat16x4_private a[3] = { simde_bfloat16x4_to_private(val.val[0]),
Expand All @@ -962,7 +962,7 @@ simde_vst3_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_bfloat16x4x3
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst3q_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_bfloat16x8x3_t val) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)
vst3q_bf16(ptr, val);
#else
simde_bfloat16x8_private a_[3] = { simde_bfloat16x8_to_private(val.val[0]),
Expand All @@ -980,7 +980,7 @@ simde_vst3q_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_bfloat16x8x
#define vst3q_bf16(a, b) simde_vst3q_bf16((a), (b))
#endif

#endif /* defined(SIMDE_ARM_NEON_BFLOAT16) */
#endif /* !defined(SIMDE_NOT_SUPPORT_BFLOAT16) */

#endif /* !defined(SIMDE_BUG_INTEL_857088) */

Expand Down
58 changes: 58 additions & 0 deletions simde/arm/neon/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,14 @@ typedef union {
SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly64, values, 16);
} simde_poly64x2_private;

typedef union {
SIMDE_ARM_NEON_DECLARE_VECTOR(simde_bfloat16, values, 8);
} simde_bfloat16x4_private;

typedef union {
SIMDE_ARM_NEON_DECLARE_VECTOR(simde_bfloat16, values, 16);
} simde_bfloat16x8_private;

#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
typedef float32_t simde_float32_t;
typedef poly8_t simde_poly8_t;
Expand Down Expand Up @@ -456,6 +464,20 @@ typedef union {
#define SIMDE_ARM_NEON_NEED_PORTABLE_F16
#endif

#if defined(SIMDE_ARM_NEON_BF16)
typedef bfloat16_t simde_bfloat16_t;
typedef bfloat16x4_t simde_bfloat16x4_t;
typedef bfloat16x4x2_t simde_bfloat16x4x2_t;
typedef bfloat16x4x3_t simde_bfloat16x4x3_t;
typedef bfloat16x4x4_t simde_bfloat16x4x4_t;
typedef bfloat16x8_t simde_bfloat16x8_t;
typedef bfloat16x8x2_t simde_bfloat16x8x2_t;
typedef bfloat16x8x3_t simde_bfloat16x8x3_t;
typedef bfloat16x8x4_t simde_bfloat16x8x4_t;
#else
#define SIMDE_ARM_NEON_NEED_PORTABLE_BF16
#endif

#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
typedef poly64_t simde_poly64_t;
typedef poly64x1_t simde_poly64x1_t;
Expand Down Expand Up @@ -501,6 +523,7 @@ typedef union {
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_BF16

#define SIMDE_ARM_NEON_NEED_PORTABLE_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN
Expand Down Expand Up @@ -564,6 +587,7 @@ typedef union {
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_BF16

#define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT

Expand All @@ -590,6 +614,7 @@ typedef union {
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_BF16

#define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN
Expand Down Expand Up @@ -667,6 +692,7 @@ typedef union {
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_BF16
#define SIMDE_ARM_NEON_NEED_PORTABLE_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN
#define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN
Expand All @@ -675,6 +701,7 @@ typedef union {
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT
#define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN
#define SIMDE_ARM_NEON_NEED_PORTABLE_BF16
#define SIMDE_ARM_NEON_NEED_PORTABLE_F16
#define SIMDE_ARM_NEON_NEED_PORTABLE_F32
#define SIMDE_ARM_NEON_NEED_PORTABLE_F64
Expand Down Expand Up @@ -765,6 +792,35 @@ typedef union {
} simde_poly16x8x4_t;
#endif

#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_BF16)
typedef simde_bfloat16 simde_bfloat16_t;
typedef simde_bfloat16x4_private simde_bfloat16x4_t;
typedef simde_bfloat16x8_private simde_bfloat16x8_t;
typedef struct simde_bfloat16x4x2_t {
simde_bfloat16x4_t val[2];
} simde_bfloat16x4x2_t;

typedef struct simde_bfloat16x8x2_t {
simde_bfloat16x8_t val[2];
} simde_bfloat16x8x2_t;

typedef struct simde_bfloat16x4x3_t {
simde_bfloat16x4_t val[3];
} simde_bfloat16x4x3_t;

typedef struct simde_bfloat16x8x3_t {
simde_bfloat16x8_t val[3];
} simde_bfloat16x8x3_t;

typedef struct simde_bfloat16x4x4_t {
simde_bfloat16x4_t val[4];
} simde_bfloat16x4x4_t;

typedef struct simde_bfloat16x8x4_t {
simde_bfloat16x8_t val[4];
} simde_bfloat16x8x4_t;
#endif

#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT)
typedef simde_int8x8_private simde_int8x8_t;
#endif
Expand Down Expand Up @@ -1271,6 +1327,7 @@ SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x1)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly8x8)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly16x4)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly64x1)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(bfloat16x4)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x16)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x8)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x4)
Expand All @@ -1285,6 +1342,7 @@ SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly64x2)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float16x8)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x4)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x2)
SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(bfloat16x8)

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
5 changes: 5 additions & 0 deletions simde/simde-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,11 @@
# define SIMDE_ARCH_ARM_NEON_FP16
#endif

/* Availability of 16-bit brain floating-point arithmetic intrinsics */
#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
# define SIMDE_ARCH_ARM_NEON_BF16
#endif

/* LoongArch
<https://en.wikipedia.org/wiki/Loongson#LoongArch> */
#if defined(__loongarch32)
Expand Down
Loading

0 comments on commit 11a06a4

Please sign in to comment.