From 78f7c1e7b910b3cd096dfc596c4ea20c06c94d87 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Tue, 22 Oct 2024 01:07:00 +0200 Subject: [PATCH] Fix compilation of NEON code [ci skip] --- src/pyfastani/_sequtils/neon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pyfastani/_sequtils/neon.c b/src/pyfastani/_sequtils/neon.c index 9c5112f..61eba65 100644 --- a/src/pyfastani/_sequtils/neon.c +++ b/src/pyfastani/_sequtils/neon.c @@ -1,5 +1,6 @@ #include -#include +#include +#include extern void neon_copy_upper(char* dst, const char* src, size_t len) { const int8x16_t ascii_a = vdupq_n_s8('a' - 1); @@ -7,9 +8,9 @@ extern void neon_copy_upper(char* dst, const char* src, size_t len) { const int8x16_t offset = vdupq_n_s8('a' - 'A'); while (len >= sizeof(int8x16_t)) { - int8x16_t inp = vld1q_u8((int8_t*) src); - int8x16_t greater_than_a = vcgtq_s8(inp, ascii_a); - int8x16_t less_equal_z = vcgtq_s8(ascii_z, inp); + int8x16_t inp = vld1q_s8((int8_t*) src); + int8x16_t greater_than_a = vreinterpretq_s8_u8(vcgtq_s8(inp, ascii_a)); + int8x16_t less_equal_z = vreinterpretq_s8_u8(vcgtq_s8(ascii_z, inp)); int8x16_t mask = vandq_s8(greater_than_a, less_equal_z); int8x16_t diff = vandq_s8(mask, offset); int8x16_t added = vsubq_s8(inp, diff);