Skip to content

Commit

Permalink
Fix compilation of NEON code [ci skip]
Browse files Browse the repository at this point in the history
  • Loading branch information
althonos committed Oct 21, 2024
1 parent eec8d98 commit 78f7c1e
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions src/pyfastani/_sequtils/neon.c
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#include <ctype.h>
#include <x86intrin.h>
#include <stddef.h>
#include <arm_neon.h>

extern void neon_copy_upper(char* dst, const char* src, size_t len) {
const int8x16_t ascii_a = vdupq_n_s8('a' - 1);
const int8x16_t ascii_z = vdupq_n_s8('z');
const int8x16_t offset = vdupq_n_s8('a' - 'A');

while (len >= sizeof(int8x16_t)) {
int8x16_t inp = vld1q_u8((int8_t*) src);
int8x16_t greater_than_a = vcgtq_s8(inp, ascii_a);
int8x16_t less_equal_z = vcgtq_s8(ascii_z, inp);
int8x16_t inp = vld1q_s8((int8_t*) src);
int8x16_t greater_than_a = vreinterpretq_s8_u8(vcgtq_s8(inp, ascii_a));
int8x16_t less_equal_z = vreinterpretq_s8_u8(vcgtq_s8(ascii_z, inp));
int8x16_t mask = vandq_s8(greater_than_a, less_equal_z);
int8x16_t diff = vandq_s8(mask, offset);
int8x16_t added = vsubq_s8(inp, diff);
Expand Down

0 comments on commit 78f7c1e

Please sign in to comment.