Skip to content

Commit

Permalink
[Fix] Add conditions for fp16 intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
yyctw authored and mr-c committed Oct 13, 2023
1 parent b34c8d2 commit dbe0d24
Show file tree
Hide file tree
Showing 76 changed files with 1,170 additions and 864 deletions.
4 changes: 4 additions & 0 deletions simde/arm/neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include "neon/types.h"

#include "neon/aba.h"
#include "neon/abal.h"
#include "neon/abal_high.h"
#include "neon/abd.h"
#include "neon/abdl.h"
#include "neon/abs.h"
Expand All @@ -48,6 +50,8 @@
#include "neon/bsl.h"
#include "neon/cage.h"
#include "neon/cagt.h"
#include "neon/cale.h"
#include "neon/calt.h"
#include "neon/ceq.h"
#include "neon/ceqz.h"
#include "neon/cge.h"
Expand Down
2 changes: 1 addition & 1 deletion simde/arm/neon/create.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ simde_vcreate_u64(uint64_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vcreate_f16(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcreate_f16(a);
#else
return simde_vreinterpret_f16_u64(simde_vdup_n_u64(a));
Expand Down
66 changes: 21 additions & 45 deletions simde/arm/neon/cvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ simde_vcvt_f64_f32(simde_float32x2_t a) {

SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_x_vcvts_s16_f16(simde_float16 a) {
#if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_ARM_NEON_FP16)
return HEDLEY_STATIC_CAST(int16_t, a);
simde_vcvth_s16_f16(simde_float16 a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcvth_s16_f16(a);
#else
simde_float32 af = simde_float16_to_float32(a);
if (HEDLEY_UNLIKELY(af < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) {
Expand All @@ -157,12 +157,16 @@ simde_x_vcvts_s16_f16(simde_float16 a) {
}
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcvth_s16_f16
#define vcvth_s16_f16(a) simde_vcvth_s16_f16(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_x_vcvts_u16_f16(simde_float16 a) {
#if defined(SIMDE_FAST_CONVERSION_RANGE)
return HEDLEY_STATIC_CAST(uint16_t, simde_float16_to_float32(a));
simde_vcvth_u16_f16(simde_float16 a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcvth_u16_f16(a);
#else
simde_float32 af = simde_float16_to_float32(a);
if (HEDLEY_UNLIKELY(af < SIMDE_FLOAT32_C(0.0))) {
Expand All @@ -176,6 +180,10 @@ simde_x_vcvts_u16_f16(simde_float16 a) {
}
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcvth_u16_f16
#define vcvth_u16_f16(a) simde_vcvth_u16_f16(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int32_t
Expand Down Expand Up @@ -266,7 +274,7 @@ simde_vcvtd_s64_f64(simde_float64 a) {
return INT64_MIN;
} else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) {
return INT64_MAX;
} else if (simde_math_isnanf(a)) {
} else if (simde_math_isnan(a)) {
return 0;
} else {
return HEDLEY_STATIC_CAST(int64_t, a);
Expand Down Expand Up @@ -330,38 +338,6 @@ simde_vcvtd_f64_u64(uint64_t a) {
#define vcvtd_f64_u64(a) simde_vcvtd_f64_u64(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float16_t
simde_vcvth_f16_s16(int16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcvth_f16_s16(a);
#elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI
return HEDLEY_STATIC_CAST(simde_float16_t, a);
#else
return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcvth_f16_s16
#define vcvth_f16_s16(a) simde_vcvth_f16_s16(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float16_t
simde_vcvth_f16_u16(uint16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcvth_f16_u16(a);
#elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI
return HEDLEY_STATIC_CAST(simde_float16_t, a);
#else
return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcvth_f16_u16
#define vcvth_f16_u16(a) simde_vcvth_f16_u16(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vcvt_s16_f16(simde_float16x4_t a) {
Expand All @@ -376,7 +352,7 @@ simde_vcvt_s16_f16(simde_float16x4_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]);
r_.values[i] = simde_vcvth_s16_f16(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -428,7 +404,7 @@ simde_vcvt_u16_f16(simde_float16x4_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]);
r_.values[i] = simde_vcvth_u16_f16(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -533,7 +509,7 @@ simde_vcvtq_s16_f16(simde_float16x8_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]);
r_.values[i] = simde_vcvth_s16_f16(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -637,7 +613,7 @@ simde_vcvtq_u16_f16(simde_float16x8_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]);
r_.values[i] = simde_vcvth_u16_f16(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -1214,7 +1190,7 @@ simde_vcvtas_s32_f32(simde_float32 a) {
} else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) {
return 0;
} else {
return HEDLEY_STATIC_CAST(int32_t, simde_math_round(a));
return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a));
}
#endif
}
Expand All @@ -1237,7 +1213,7 @@ simde_vcvtas_u32_f32(simde_float32 a) {
return 0;
} else {
if(a < 0) return 0;
return HEDLEY_STATIC_CAST(int32_t, simde_math_round(a));
return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(a));
}
#endif
}
Expand Down
Loading

0 comments on commit dbe0d24

Please sign in to comment.