Skip to content

Commit

Permalink
arm neon: fix arm64 gcc11 build excess elements in vector failure
Browse files Browse the repository at this point in the history
Resolved a build failure on ARM64 with GCC 11 caused by excess
elements in a vector initializer. The issue stemmed from the
`__builtin_shuffle` function, where the number of elements in
the argument vector(s) and the mask vector must match.

For more details, refer to issue #1211.

Signed-off-by: LI Qingwu <[email protected]>
  • Loading branch information
Qingwu-Li authored and mr-c committed Sep 11, 2024
1 parent 9aeb89e commit 84ebde4
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 40 deletions.
4 changes: 2 additions & 2 deletions simde/arm/neon/cadd_rot270.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_t
r_.sv64 = __riscv_vfadd_vv_f16m1(op1, a_.sv64, 4);
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 5, 0, 7, 2);
r_.values = b_.values + a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -90,7 +90,7 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_t
r_.sv128 = __riscv_vfadd_vv_f16m1(op1, a_.sv128, 8);
#elif defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6);
r_.values = b_.values + a_.values;
#else
SIMDE_VECTORIZE
Expand Down
4 changes: 2 additions & 2 deletions simde/arm/neon/cadd_rot90.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_t b
r_.sv64 = __riscv_vfadd_vv_f16m1(op1, a_.sv64, 4);
#elif defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 4, 3, 6);
r_.values = b_.values + a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -90,7 +90,7 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, simde_float16x8_t
r_.sv128 = __riscv_vfadd_vv_f16m1(op1, a_.sv128, 8);
#elif defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14);
r_.values = b_.values + a_.values;
#else
SIMDE_VECTORIZE
Expand Down
12 changes: 6 additions & 6 deletions simde/arm/neon/cmla_lane.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a,
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -142,7 +142,7 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -228,8 +228,8 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 0, 0, 2, 2);
r_low.values += b_.values * a_low.values;
r_high.values += b_.values * a_high.values;
#else
Expand Down Expand Up @@ -320,9 +320,9 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 0, 0, 2, 2);
r_low.values += b_.values * a_low.values;
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 0, 0, 2, 2);
r_high.values += b_.values * a_high.values;
#else
SIMDE_VECTORIZE
Expand Down
20 changes: 10 additions & 10 deletions simde/arm/neon/cmla_rot180_lane.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -151,9 +151,9 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float1
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3);
r_low.values += b_.values * a_low.values;
r_high.values += b_.values * a_high.values;
#else
Expand Down Expand Up @@ -243,8 +243,8 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float1
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -332,9 +332,9 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 0, 0, 2, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3);
r_low.values += b_.values * a_low.values;
r_high.values += b_.values * a_high.values;
#else
Expand Down
20 changes: 10 additions & 10 deletions simde/arm/neon/cmla_rot270_lane.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -151,9 +151,9 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float1
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3);
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 1, 1, 3, 3);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2);
r_low.values += b_.values * a_low.values;
r_high.values += b_.values * a_high.values;
#else
Expand Down Expand Up @@ -243,8 +243,8 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float1
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -332,9 +332,9 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3);
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 1, 1, 3, 3);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2);
r_high.values += b_.values * a_high.values;
r_low.values += b_.values * a_low.values;
#else
Expand Down
20 changes: 10 additions & 10 deletions simde/arm/neon/cmla_rot90_lane.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -147,8 +147,8 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6);
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
Expand Down Expand Up @@ -237,9 +237,9 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3);
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 1, 1, 3, 3);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6);
r_low.values += b_.values * a_low.values;
r_high.values += b_.values * a_high.values;
#else
Expand Down Expand Up @@ -333,9 +333,9 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float1
simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])));
#if defined(SIMDE_SHUFFLE_VECTOR_) && \
((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16))
a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3);
a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6);
a_low.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_low.values, a_low.values, 1, 1, 3, 3);
a_high.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_high.values, a_high.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6);
r_low.values += b_.values * a_low.values;
r_high.values += b_.values * a_high.values;
#else
Expand Down

0 comments on commit 84ebde4

Please sign in to comment.