Skip to content

Commit

Permalink
AVX functions renammed because of conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Geolm committed Jan 19, 2024
1 parent fed0417 commit ab55538
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 38 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,34 @@ It's one-header lib, just define the macro once and include the header

```C
// max error : 5.960464478e-08
__m256 _mm256_cos_ps(__m256 a);
__m256 mm256_cos_ps(__m256 a);

// max error : 5.960464478e-08
__m256 _mm256_sin_ps(__m256 a);
__m256 mm256_sin_ps(__m256 a);

// max error : 5.960464478e-08
void _mm256_sincos_ps(__m256 a, __m256 *s, __m256 *c);
void mm256_sincos_ps(__m256 a, __m256 *s, __m256 *c);

// max error : 2.384185791e-07
__m256 _mm256_acos_ps(__m256 a);
__m256 mm256_acos_ps(__m256 a);

// max error : 1.192092896e-07
__m256 _mm256_asin_ps(__m256 a);
__m256 mm256_asin_ps(__m256 a);

// max error : 6.699562073e-05
__m256 _mm256_atan_ps(__m256 a);
__m256 mm256_atan_ps(__m256 a);

// max error : 2.384185791e-07
__m256 _mm256_atan2_ps(__m256 x, __m256 y);
__m256 mm256_atan2_ps(__m256 x, __m256 y);

// max error : 4.768371582e-07
__m256 _mm256_log_ps(__m256 a);
__m256 mm256_log_ps(__m256 a);

// max error : 1.108270880e-07
__m256 _mm256_exp_ps(__m256 a);
__m256 mm256_exp_ps(__m256 a);

// max error : 4.768371582e-07
__m256 _mm256_cbrt_ps(__m256 a);
__m256 mm256_cbrt_ps(__m256 a);
```
Note : the same functions are defined in NEON intrinsics style :
Expand Down
46 changes: 23 additions & 23 deletions math_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,34 +52,34 @@ extern "C" {
#include <immintrin.h>

// max error : 5.960464478e-08
__m256 _mm256_cos_ps(__m256 a);
__m256 mm256_cos_ps(__m256 a);

// max error : 5.960464478e-08
__m256 _mm256_sin_ps(__m256 a);
__m256 mm256_sin_ps(__m256 a);

// max error : 5.960464478e-08
void _mm256_sincos_ps(__m256 a, __m256 *s, __m256 *c);
void mm256_sincos_ps(__m256 a, __m256 *s, __m256 *c);

// max error : 2.384185791e-07
__m256 _mm256_acos_ps(__m256 a);
__m256 mm256_acos_ps(__m256 a);

// max error : 1.192092896e-07
__m256 _mm256_asin_ps(__m256 a);
__m256 mm256_asin_ps(__m256 a);

// max error : 6.699562073e-05
__m256 _mm256_atan_ps(__m256 a);
__m256 mm256_atan_ps(__m256 a);

// max error : 2.384185791e-07
__m256 _mm256_atan2_ps(__m256 x, __m256 y);
__m256 mm256_atan2_ps(__m256 x, __m256 y);

// max error : 4.768371582e-07
__m256 _mm256_log_ps(__m256 a);
__m256 mm256_log_ps(__m256 a);

// max error : 1.108270880e-07
__m256 _mm256_exp_ps(__m256 a);
__m256 mm256_exp_ps(__m256 a);

// max error : 4.768371582e-07
__m256 _mm256_cbrt_ps(__m256 a);
__m256 mm256_cbrt_ps(__m256 a);

#define __MATH__INTRINSICS__AVX__

Expand Down Expand Up @@ -211,9 +211,9 @@ extern "C" {
static inline simd_vectori simd_cmp_eq_i(simd_vectori a, simd_vectori b) {return _mm256_cmpeq_epi32(a, b);}
static inline simd_vectori simd_cmp_gt_i(simd_vectori a, simd_vectori b) {return _mm256_cmpgt_epi32(a, b);}

#define simd_asin _mm256_asin_ps
#define simd_atan _mm256_atan_ps
#define simd_sincos _mm256_sincos_ps
#define simd_asin mm256_asin_ps
#define simd_atan mm256_atan_ps
#define simd_sincos mm256_sincos_ps

#endif

Expand Down Expand Up @@ -255,7 +255,7 @@ static inline simd_vector simd_sign(simd_vector a)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vatanq_f32(float32x4_t xx)
#else
__m256 _mm256_atan_ps(__m256 xx)
__m256 mm256_atan_ps(__m256 xx)
#endif
{
simd_vector sign = simd_sign(xx);
Expand Down Expand Up @@ -289,7 +289,7 @@ static inline simd_vector simd_sign(simd_vector a)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vatan2q_f32(float32x4_t x, float32x4_t y)
#else
__m256 _mm256_atan2_ps(__m256 x, __m256 y)
__m256 mm256_atan2_ps(__m256 x, __m256 y)
#endif
{
simd_vector swap = simd_cmp_lt(simd_abs(x), simd_abs(y));
Expand All @@ -310,7 +310,7 @@ static inline simd_vector simd_sign(simd_vector a)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vlogq_f32(float32x4_t x)
#else
__m256 _mm256_log_ps(__m256 x)
__m256 mm256_log_ps(__m256 x)
#endif
{
simd_vector one = simd_splat(1.f);
Expand Down Expand Up @@ -364,7 +364,7 @@ static inline simd_vector simd_sign(simd_vector a)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vexpq_f32(float32x4_t x)
#else
__m256 _mm256_exp_ps(__m256 x)
__m256 mm256_exp_ps(__m256 x)
#endif
{
simd_vector tmp = simd_splat_zero();
Expand Down Expand Up @@ -411,7 +411,7 @@ static inline simd_vector simd_sign(simd_vector a)
#ifdef __MATH__INTRINSICS__NEON__
void vsincosq_f32(float32x4_t x, float32x4_t* s, float32x4_t* c)
#else
void _mm256_sincos_ps(__m256 x, __m256* s, __m256* c)
void mm256_sincos_ps(__m256 x, __m256* s, __m256* c)
#endif
{
simd_vector xmm1, xmm2, xmm3 = simd_splat_zero(), sign_bit_sin, y;
Expand Down Expand Up @@ -497,7 +497,7 @@ static inline simd_vector simd_sign(simd_vector a)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vsinq_f32(float32x4_t x)
#else
__m256 _mm256_sin_ps(__m256 x)
__m256 mm256_sin_ps(__m256 x)
#endif
{
simd_vector sinus, cosinus;
Expand All @@ -509,7 +509,7 @@ __m256 _mm256_sin_ps(__m256 x)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vcosq_f32(float32x4_t x)
#else
__m256 _mm256_cos_ps(__m256 x)
__m256 mm256_cos_ps(__m256 x)
#endif
{
simd_vector sinus, cosinus;
Expand All @@ -522,7 +522,7 @@ __m256 _mm256_cos_ps(__m256 x)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vasinq_f32(float32x4_t xx)
#else
__m256 _mm256_asin_ps(__m256 xx)
__m256 mm256_asin_ps(__m256 xx)
#endif
{
simd_vector x = xx;
Expand Down Expand Up @@ -560,7 +560,7 @@ __m256 _mm256_cos_ps(__m256 x)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vacosq_f32(float32x4_t x)
#else
__m256 _mm256_acos_ps(__m256 x)
__m256 mm256_acos_ps(__m256 x)
#endif
{
simd_vector out_of_bound = simd_cmp_gt(simd_abs(x), simd_splat(1.f));
Expand All @@ -574,7 +574,7 @@ __m256 _mm256_cos_ps(__m256 x)
#ifdef __MATH__INTRINSICS__NEON__
float32x4_t vcbrtq_f32(float32x4_t xx)
#else
__m256 _mm256_cbrt_ps(__m256 xx)
__m256 mm256_cbrt_ps(__m256 xx)
#endif
{
simd_vector one_over_three = simd_splat(0.333333333333f);
Expand Down
10 changes: 5 additions & 5 deletions tests/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ SUITE(trigonometry)
printf(".");

#ifdef __MATH__INTRINSICS__AVX__
RUN_TESTp(generic_test, sinf, _mm256_sin_ps, -10.f, 10.f, FLT_EPSILON, 1024, false, "_mm256_sin_ps");
RUN_TESTp(generic_test, cosf, _mm256_cos_ps, -10.f, 10.f, FLT_EPSILON, 1024, false, "_mm256_cos_ps");
RUN_TESTp(generic_test, acosf, _mm256_acos_ps, -1.f, 1.f, 1.e-06f, 1024, false, "_mm256_acos_ps");
RUN_TESTp(generic_test, asinf, _mm256_asin_ps, -1.f, 1.f, 1.e-06f, 1024, false, "_mm256_asin_ps");
RUN_TESTp(generic_test, atanf, _mm256_atan_ps, -10.f, 10.f, 1.e-04f, 1024, false, "_mm256_atan_ps");
RUN_TESTp(generic_test, sinf, mm256_sin_ps, -10.f, 10.f, FLT_EPSILON, 1024, false, "mm256_sin_ps");
RUN_TESTp(generic_test, cosf, mm256_cos_ps, -10.f, 10.f, FLT_EPSILON, 1024, false, "mm256_cos_ps");
RUN_TESTp(generic_test, acosf, mm256_acos_ps, -1.f, 1.f, 1.e-06f, 1024, false, "mm256_acos_ps");
RUN_TESTp(generic_test, asinf, mm256_asin_ps, -1.f, 1.f, 1.e-06f, 1024, false, "mm256_asin_ps");
RUN_TESTp(generic_test, atanf, mm256_atan_ps, -10.f, 10.f, 1.e-04f, 1024, false, "mm256_atan_ps");
//RUN_TESTp(generic_test2, atan2_xy, simd_atan2, 1.e-06f, 1024, false, "simd_atan2");
#else
RUN_TESTp(generic_test, sinf, vsinq_f32, -10.f, 10.f, FLT_EPSILON, 1024, false, "vsinq_f32");
Expand Down

0 comments on commit ab55538

Please sign in to comment.