diff --git a/doc/graphene-sections.txt b/doc/graphene-sections.txt index a38cd87..bb85ba6 100644 --- a/doc/graphene-sections.txt +++ b/doc/graphene-sections.txt @@ -439,6 +439,8 @@ graphene_simd4f_is_zero4 graphene_simd4f_is_zero3 graphene_simd4f_is_zero2 graphene_simd4f_interpolate +graphene_simd4f_ceil +graphene_simd4f_floor graphene_simd4f_union_t graphene_simd4i_union_t diff --git a/include/graphene-simd4f.h b/include/graphene-simd4f.h index ea39062..758343d 100644 --- a/include/graphene-simd4f.h +++ b/include/graphene-simd4f.h @@ -174,6 +174,11 @@ bool graphene_simd4f_cmp_gt (const graphene_simd4f_t GRAPHENE_AVAILABLE_IN_1_0 graphene_simd4f_t graphene_simd4f_neg (const graphene_simd4f_t s); +GRAPHENE_AVAILABLE_IN_1_12 +graphene_simd4f_t graphene_simd4f_ceil (const graphene_simd4f_t s); +GRAPHENE_AVAILABLE_IN_1_12 +graphene_simd4f_t graphene_simd4f_floor (const graphene_simd4f_t s); + #if !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_SSE) /* SSE2 implementation of SIMD 4f */ @@ -471,6 +476,34 @@ typedef GRAPHENE_ALIGN16 union { (graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \ })) +# if defined(GRAPHENE_USE_SSE4_1) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_ceil_ps ((s)); \ + })) +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_floor_ps ((s)); \ + })) +# else +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ + const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ + const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \ + const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ + })) +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ + const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ + const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \ + const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ + })) +# endif + /* On MSVC, we use static inlines */ # elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */ @@ -771,6 +804,37 @@ _simd4f_neg (const graphene_simd4f_t s) return _mm_xor_ps (s, _mm_load_ps (__mask.f)); } +#define graphene_simd4f_ceil(s) _simd4f_ceil(s) +#define graphene_simd4f_floor(s) _simd4f_floor(s) + +static inline graphene_simd4f_t +_simd4f_ceil (const graphene_simd4f_t s) +{ +#if defined(GRAPHENE_USE_SSE4_1) + return _mm_ceil_ps (s); +#else + const float __ceil_x = ceilf (graphene_simd4f_get_x (s)); + const float __ceil_y = ceilf (graphene_simd4f_get_y (s)); + const float __ceil_z = ceilf (graphene_simd4f_get_z (s)); + const float __ceil_w = ceilf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); +#endif +} + +static inline graphene_simd4f_t +_simd4f_floor (const graphene_simd4f_t s) +{ +#if defined(GRAPHENE_USE_SSE4_1) + return _mm_floor_ps (s); +#else + const float __floor_x = floorf (graphene_simd4f_get_x (s)); + const float __floor_y = floorf (graphene_simd4f_get_y (s)); + const float __floor_z = floorf (graphene_simd4f_get_z (s)); + const float __floor_w = floorf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); +#endif +} + #else /* SSE intrinsics-not GCC or Visual Studio */ # error "Need GCC-compatible or Visual Studio compiler for SSE extensions." @@ -1076,6 +1140,24 @@ typedef int graphene_simd4i_t __attribute__((vector_size (16))); graphene_simd4f_mul (__s, __minus_one); \ })) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ + const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ + const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \ + const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ + })) + +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ + const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ + const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \ + const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ + })) + #elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_ARM_NEON) /* ARM Neon implementation of SIMD4f */ @@ -1398,6 +1480,24 @@ typedef float32x2_t graphene_simd2f_t; (graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); \ })) +# define graphene_simd4f_ceil(s) \ + (__extension__ ({ \ + const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \ + const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \ + const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \ + const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \ + })) + +# define graphene_simd4f_floor(s) \ + (__extension__ ({ \ + const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \ + const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \ + const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \ + const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \ + (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \ + })) + #elif defined _MSC_VER /* Visual Studio ARM */ # define graphene_simd4f_init(x,y,z,w) _simd4f_init(x,y,z,w) @@ -1717,6 +1817,29 @@ _simd4f_neg (const graphene_simd4f_t s) return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); } +# define graphene_simd4f_ceil(s) _simd4f_ceil(s) +# define graphene_simd4f_floor(s) _simd4f_floor(s) + +static inline graphene_simd4f_t +_simd4f_ceil (const graphene_simd4f_t s) +{ + const float __ceil_x = ceilf (graphene_simd4f_get_x (s)); + const float __ceil_y = ceilf (graphene_simd4f_get_y (s)); + const float __ceil_z = ceilf (graphene_simd4f_get_z (s)); + const float __ceil_w = ceilf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); +} + +static inline graphene_simd4f_t +_simd4f_floor (const graphene_simd4f_t s) +{ + const float __floor_x = floorf (graphene_simd4f_get_x (s)); + const float __floor_y = floorf (graphene_simd4f_get_y (s)); + const float __floor_z = floorf (graphene_simd4f_get_z (s)); + const float __floor_w = floorf (graphene_simd4f_get_w (s)); + return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); +} + #else /* ARM NEON intrinsics-not GCC or Visual Studio */ # error "Need GCC-compatible or Visual Studio compiler for ARM NEON extensions." @@ -1829,6 +1952,10 @@ _simd4f_neg (const graphene_simd4f_t s) (graphene_simd4f_cmp_gt ((a), (b))) #define graphene_simd4f_neg(s) \ (graphene_simd4f_neg ((s))) +#define graphene_simd4f_ceil(s) \ + (graphene_simd4f_ceil ((s))) +#define graphene_simd4f_floor(s) \ + (graphene_simd4f_floor ((s))) #else # error "Unsupported simd4f implementation." diff --git a/src/graphene-simd4f.c b/src/graphene-simd4f.c index 816de00..00c545b 100644 --- a/src/graphene-simd4f.c +++ b/src/graphene-simd4f.c @@ -1041,6 +1041,38 @@ graphene_simd4f_t return graphene_simd4f_neg (s); } +/** + * graphene_simd4f_ceil: + * @s: a #graphene_simd4f_t + * + * Rounds each component of the vector @s up to the nearest integer value. + * + * Returns: the rounded up vector + * + * Since: 1.12 + */ +graphene_simd4f_t +(graphene_simd4f_ceil) (const graphene_simd4f_t s) +{ + return graphene_simd4f_ceil (s); +} + +/** + * graphene_simd4f_floor: + * @s: a #graphene_simd4f_t + * + * Rounds each component of the vector @s down to the nearest integer value. + * + * Returns: the rounded down vector + * + * Since: 1.12 + */ +graphene_simd4f_t +(graphene_simd4f_floor) (const graphene_simd4f_t s) +{ + return graphene_simd4f_floor (s); +} + #else /* GRAPHENE_USE_SCALAR */ graphene_simd4f_t @@ -1472,4 +1504,16 @@ graphene_simd4f_t return graphene_simd4f_init (-s.x, -s.y, -s.z, -s.w); } +graphene_simd4f_t +(graphene_simd4f_ceil) (const graphene_simd4f_t s) +{ + return graphene_simd4f_init (ceilf (s.x), ceilf (s.y), ceilf (s.z), ceilf (s.w)); +} + +graphene_simd4f_t +(graphene_simd4f_floor) (const graphene_simd4f_t s) +{ + return graphene_simd4f_init (floorf (s.x), floorf (s.y), floorf (s.z), floorf (s.w)); +} + #endif /* GRAPHENE_USE_SCALAR */ diff --git a/tests/simd.c b/tests/simd.c index 8c75041..0bafb30 100644 --- a/tests/simd.c +++ b/tests/simd.c @@ -320,6 +320,36 @@ simd_operators_reciprocal (void) NULL); } +static void +simd_operators_ceil (void) +{ + graphene_simd4f_t a, b, check; + + check = graphene_simd4f_init (2.0f, 3.0f, 4.0f, 5.0f); + a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f); + b = graphene_simd4f_ceil (a); + + mutest_expect ("ceil() to round up to the nearest integer", + mutest_bool_value (graphene_simd4f_cmp_eq (b, check)), + mutest_to_be_true, + NULL); +} + +static void +simd_operators_floor (void) +{ + graphene_simd4f_t a, b, check; + + check = graphene_simd4f_init (1.0f, 2.0f, 3.0f, 4.0f); + a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f); + b = graphene_simd4f_floor (a); + + mutest_expect ("floor() to round down to the nearest integer", + mutest_bool_value (graphene_simd4f_cmp_eq (b, check)), + mutest_to_be_true, + NULL); +} + static void simd_suite (void) { @@ -339,6 +369,9 @@ simd_suite (void) mutest_it ("can compute the maximum vector and scalar", simd_operators_max); mutest_it ("can compute the reciprocal of vector", simd_operators_reciprocal); + + mutest_it ("can round up vector components", simd_operators_ceil); + mutest_it ("can round down vector components", simd_operators_floor); } MUTEST_MAIN (