Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ARM] [SDPA] SVE implementation of MHASingleToken for FP32 #27273

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions cmake/developer_package/compile_flags/os_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

include(ProcessorCount)
include(CheckCXXCompilerFlag)
include(CheckCXXSourceCompiles)

#
# ov_disable_deprecated_warnings()
Expand Down Expand Up @@ -91,6 +92,50 @@ macro(ov_dev_package_no_errors)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ov_c_cxx_dev_no_errors}")
endmacro()

#
# ov_check_compiler_supports_sve(flags)
#
# Checks whether CXX compiler for passed language supports SVE code compilation
#
macro(ov_check_compiler_supports_sve flags)
# Code to compile
set(SVE_CODE "
#include <arm_sve.h>
int main() {
svfloat64_t a;
a = svdup_n_f64(0);
return 0;
}")

# Save the current state of required flags
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})

# Set the flags necessary for compiling the test code with SVE support
set(CMAKE_REQUIRED_FLAGS "${CMAKE_CXX_FLAGS_INIT} ${flags}")

# Check if the source code compiles with the given flags for C++
CHECK_CXX_SOURCE_COMPILES("${SVE_CODE}" CXX_HAS_SVE)

# If the compilation test is successful, set appropriate variables indicating support
if(CXX_HAS_SVE)
set(CXX_SVE_FOUND TRUE CACHE BOOL "SVE available on host")
set(CXX_SVE_FOUND TRUE CACHE BOOL "CXX SVE support")
set(CXX_SVE_FLAGS "${flags}" CACHE STRING "CXX SVE flags")
endif()

# Restore the original state of required flags
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})

# If the compilation test fails, indicate that the support is not found
if(NOT CXX_SVE_FOUND)
set(CXX_SVE_FOUND FALSE CACHE BOOL "CXX SVE support")
set(CXX_SVE_FLAGS "" CACHE STRING "CXX SVE flags")
endif()

# Mark the variables as advanced to hide them in the default CMake GUI
mark_as_advanced(CXX_SVE_FOUND CXX_SVE_FLAGS)
endmacro()

#
# ov_sse42_optimization_flags(<output flags>)
#
Expand Down Expand Up @@ -208,6 +253,49 @@ macro(ov_arm_neon_fp16_optimization_flags flags)
endif()
endmacro()

#
# ov_arm_sve_optimization_flags(<output flags>)
#
macro(ov_arm_sve_optimization_flags flags)
# Check for compiler SVE support
ov_check_compiler_supports_sve("-march=armv8-a+sve")

if(OV_COMPILER_IS_INTEL_LLVM)
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# nothing should be required here
elseif(ANDROID)
if(ANDROID_ABI STREQUAL "arm64-v8a")
set(${flags} -Wno-unused-command-line-argument)
if(CXX_SVE_FOUND)
list(APPEND ${flags} -march=armv8-a+sve)
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
if(AARCH64)
set(${flags} -O2)

# Add flag for SVE if supported
if(CXX_SVE_FOUND)
list(APPEND ${flags} -march=armv8-a+sve)
endif()
if(NOT CMAKE_CL_64)
list(APPEND ${flags} -ftree-vectorize)
endif()

set(${flags} ${${flags}})
elseif(ARM)
message(WARNING "SVE is not supported on 32-bit ARM architectures.")
else()
message(WARNING "SVE is not supported by architecture ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
endmacro()

#
# ov_disable_all_warnings(<target1 [target2 target3 ...]>)
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(_CPU_CHECK_ANY "true")
set(_CPU_CHECK_SSE42 "with_cpu_x86_sse42()")
set(_CPU_CHECK_AVX "with_cpu_x86_avx()")
set(_CPU_CHECK_NEON_FP16 "with_cpu_neon_fp16()")
set(_CPU_CHECK_SVE "with_cpu_sve()")
set(_CPU_CHECK_AVX2 "with_cpu_x86_avx2()")
set(_CPU_CHECK_AVX512F "with_cpu_x86_avx512f()")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
#

## list of available instruction sets
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16)
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16 SVE)

set(_ACCEPTED_ARCHS_ANY "^(ANY)$")
set(_ACCEPTED_ARCHS_SSE42 "^(ANY|SSE42)$")
set(_ACCEPTED_ARCHS_AVX "^(ANY|SSE42|AVX)$")
set(_ACCEPTED_ARCHS_AVX2 "^(ANY|SSE42|AVX|AVX2)$")
set(_ACCEPTED_ARCHS_AVX512F "^(ANY|SSE42|AVX|AVX2|AVX512F)$")
set(_ACCEPTED_ARCHS_NEON_FP16 "^(ANY|NEON_FP16)$")
set(_ACCEPTED_ARCHS_SVE "^(ANY|SVE)$")

## Arch specific definitions
set(_DEFINE_ANY "")
Expand All @@ -19,12 +20,14 @@ set(_DEFINE_AVX "HAVE_AVX" ${_DEFINE_SSE42})
set(_DEFINE_AVX2 "HAVE_AVX2" ${_DEFINE_AVX})
set(_DEFINE_AVX512F "HAVE_AVX512F" ${_DEFINE_AVX2})
set(_DEFINE_NEON_FP16 "HAVE_NEON_FP16" ${_DEFINE_ANY})
set(_DEFINE_SVE "HAVE_SVE" ${_DEFINE_SVE})

## Arch specific compile options
ov_avx512_optimization_flags(_FLAGS_AVX512F)
ov_avx2_optimization_flags (_FLAGS_AVX2)
ov_sse42_optimization_flags (_FLAGS_SSE42)
ov_arm_neon_fp16_optimization_flags(_FLAGS_NEON_FP16)
ov_arm_sve_optimization_flags(_FLAGS_SVE)
set(_FLAGS_AVX "") ## TBD is not defined for OV project yet
set(_FLAGS_ANY "") ##

Expand Down Expand Up @@ -185,6 +188,8 @@ endfunction()
function(_currently_requested_top_arch VAR)
if(ENABLE_NEON_FP16)
set(RES NEON_FP16)
elseif(ENABLE_SVE)
set(RES SVE)
elseif(ENABLE_AVX512F)
set(RES AVX512F)
elseif(ENABLE_AVX2)
Expand Down
2 changes: 2 additions & 0 deletions cmake/developer_package/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ ov_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR

ov_dependent_option(ENABLE_NEON_FP16 "Enable ARM FP16 optimizations" ON "AARCH64" OFF)

ov_dependent_option(ENABLE_SVE "Enable SVE optimizations" ON "AARCH64" OFF)

# Type of build, we add this as an explicit option to default it to ON
get_property(BUILD_SHARED_LIBS_DEFAULT GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
ov_option (BUILD_SHARED_LIBS "Build as a shared library" ${BUILD_SHARED_LIBS_DEFAULT})
Expand Down
7 changes: 7 additions & 0 deletions src/inference/dev_api/openvino/runtime/system_conf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_sse42();
*/
OPENVINO_RUNTIME_API bool with_cpu_neon_fp16();

/**
* @brief Checks whether CPU supports ARM SVE capability
* @ingroup ov_dev_api_system_conf
* @return `True` if ARM SVE instructions are available, `false` otherwise
*/
OPENVINO_RUNTIME_API bool with_cpu_sve();

/**
* @brief Checks whether CPU supports AVX capability
* @ingroup ov_dev_api_system_conf
Expand Down
19 changes: 19 additions & 0 deletions src/inference/src/system_conf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# include <sys/auxv.h>
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 24)
#elif defined(__APPLE__) && defined(__aarch64__)
# include <sys/sysctl.h>
# include <sys/types.h>
Expand Down Expand Up @@ -114,6 +115,10 @@ bool with_cpu_neon_fp16() {
return false;
}

bool with_cpu_sve() {
return false;
}

#else // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64

bool with_cpu_x86_sse42() {
Expand Down Expand Up @@ -173,6 +178,20 @@ bool with_cpu_neon_fp16() {
return false;
# endif
}
bool with_cpu_sve() {
# if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__arm__) && defined(__aarch64__)
const uint32_t hwcaps = getauxval(AT_HWCAP);
return hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE;
# elif !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__aarch64__) && defined(__arm__)
return false;
# elif defined(__aarch64__) && defined(__APPLE__)
return false;
# else
return false;
# endif
}
#endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64

bool check_open_mp_env_vars(bool include_omp_num_threads) {
Expand Down
22 changes: 20 additions & 2 deletions src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,24 @@ target_include_directories(${TARGET_NAME} SYSTEM PRIVATE $<TARGET_PROPERTY:dnnl,
# is not (yet) needed.
target_include_directories(${TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::reference,INTERFACE_INCLUDE_DIRECTORIES>)

# ARCH lists for softmax.cpp and mha_single_token.cpp
# Based on result of above calls, decide whether to add SVE
set(SOFTMAX_ARCH_LIST AVX512F AVX2)
set(MHA_SINGLE_TOKEN_ARCH_LIST AVX512F AVX2)

if(ENABLE_NEON_FP16)
list(APPEND SOFTMAX_ARCH_LIST NEON_FP16)
list(APPEND MHA_SINGLE_TOKEN_ARCH_LIST NEON_FP16)
endif()

if(ENABLE_SVE)
list(APPEND SOFTMAX_ARCH_LIST SVE)
list(APPEND MHA_SINGLE_TOKEN_ARCH_LIST SVE)
endif()

list(APPEND SOFTMAX_ARCH_LIST ANY)
list(APPEND MHA_SINGLE_TOKEN_ARCH_LIST ANY)

# Cross compiled function
# TODO: The same for proposal, proposalONNX, topk
cross_compiled_file(${TARGET_NAME}
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -288,14 +306,14 @@ cross_compiled_file(${TARGET_NAME}
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 NEON_FP16 ANY
ARCH ${SOFTMAX_ARCH_LIST}
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
src/nodes/kernels/scaled_attn/softmax.cpp
API src/nodes/kernels/scaled_attn/softmax.hpp
NAME attn_softmax
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 NEON_FP16 ANY
ARCH ${MHA_SINGLE_TOKEN_ARCH_LIST}
src/nodes/kernels/scaled_attn/mha_single_token.cpp
API src/nodes/kernels/scaled_attn/mha_single_token.hpp
NAME mha_single_token
Expand Down
Loading
Loading