Skip to content

Commit

Permalink
build: add armv7 support
Browse files Browse the repository at this point in the history
  • Loading branch information
ebraraktas committed Aug 12, 2024
1 parent 04a64be commit 953cd06
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 12 deletions.
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,10 @@ macro(ct2_compile_kernels_for_isa isa flag)
list(APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR}/kernels_${isa}.cc)
endmacro()

if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)"
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)|(armv7-a)"
OR (APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64"))
add_definitions(-DCT2_ARM64_BUILD)
set(CT2_BUILD_ARCH "arm64")
add_definitions(-DCT2_ARM_BUILD)
set(CT2_BUILD_ARCH "arm")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(amd64)|(AMD64)")
add_definitions(-DCT2_X86_BUILD)
set(CT2_BUILD_ARCH "x86_64")
Expand Down Expand Up @@ -261,7 +261,7 @@ if(ENABLE_CPU_DISPATCH)
ct2_compile_kernels_for_isa(avx2 "-mavx2 -mfma")
ct2_compile_kernels_for_isa(avx512 "-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq")
endif()
elseif(CT2_BUILD_ARCH STREQUAL "arm64")
elseif(CT2_BUILD_ARCH STREQUAL "arm")
ct2_compile_kernels_for_isa(neon "-DUSE_NEON")
endif()
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/cpu_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace ctranslate2 {
}
}

#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)

namespace ctranslate2 {
namespace cpu {
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/cpu_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace ctranslate2 {
bool cpu_supports_avx();
bool cpu_supports_avx2();
bool cpu_supports_avx512();
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
bool cpu_supports_neon();
#endif

Expand Down
6 changes: 3 additions & 3 deletions src/cpu/cpu_isa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace ctranslate2 {
return "AVX2";
case CpuIsa::AVX512:
return "AVX512";
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
case CpuIsa::NEON:
return "NEON";
#endif
Expand All @@ -51,7 +51,7 @@ namespace ctranslate2 {
return try_isa(env_isa, CpuIsa::AVX2, cpu_supports_avx2());
if (env_isa == "AVX")
return try_isa(env_isa, CpuIsa::AVX, cpu_supports_avx());
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
if (env_isa == "NEON")
return try_isa(env_isa, CpuIsa::NEON, cpu_supports_neon());
#endif
Expand All @@ -68,7 +68,7 @@ namespace ctranslate2 {
return CpuIsa::AVX2;
if (cpu_supports_avx())
return CpuIsa::AVX;
# elif defined(CT2_ARM64_BUILD)
# elif defined(CT2_ARM_BUILD)
if (cpu_supports_neon())
return CpuIsa::NEON;
# endif
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/cpu_isa.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace ctranslate2 {
AVX,
AVX2,
AVX512,
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
NEON,
#endif
};
Expand Down Expand Up @@ -48,7 +48,7 @@ namespace ctranslate2 {
CPU_ISA_CASE(cpu::CpuIsa::AVX, SINGLE_ARG(STMTS)) \
CPU_ISA_DEFAULT(cpu::CpuIsa::GENERIC, SINGLE_ARG(STMTS)) \
}
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
# define CPU_ISA_DISPATCH(STMTS) \
switch (cpu::get_cpu_isa()) { \
CPU_ISA_CASE(cpu::CpuIsa::NEON, SINGLE_ARG(STMTS)) \
Expand Down
16 changes: 16 additions & 0 deletions src/cpu/vec_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,35 @@ namespace ctranslate2 {
}

static inline value_type div(value_type a, value_type b) {
#ifdef __aarch64__
return vdivq_f32(a, b);
#else
return a / b;
#endif
}

static inline value_type mul_add(value_type a, value_type b, value_type c) {
#ifdef __aarch64__
return vfmaq_f32(c, a, b);
#else
return a * b + c;
#endif
}

static inline float reduce_add(value_type a) {
#ifdef __aarch64__
return vaddvq_f32(a);
#else
return a[0] + a[1] + a[2] + a[3];
#endif
}

static inline float reduce_max(value_type a) {
#ifdef __aarch64__
return vmaxvq_f32(a);
#else
return std::max({a[0], a[1], a[2], a[3]});
#endif
}

static inline value_type round(value_type v) {
Expand Down
2 changes: 1 addition & 1 deletion src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace ctranslate2 {
cpu::cpu_supports_avx(),
cpu::cpu_supports_avx2(),
cpu::cpu_supports_avx512());
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
spdlog::info("CPU: {} (NEON={})",
cpu::cpu_vendor(),
cpu::cpu_supports_neon());
Expand Down

0 comments on commit 953cd06

Please sign in to comment.