diff --git a/3rdparty/cpuinfo/CMakeLists.txt b/3rdparty/cpuinfo/CMakeLists.txt index e965fe9bb355c..914273bc56ade 100644 --- a/3rdparty/cpuinfo/CMakeLists.txt +++ b/3rdparty/cpuinfo/CMakeLists.txt @@ -3,7 +3,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR) # ---[ Setup project PROJECT( cpuinfo - LANGUAGES C CXX + LANGUAGES C ) # ---[ Options. @@ -13,6 +13,26 @@ SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared) SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)") SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none) +IF(ANDROID) + OPTION(CPUINFO_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" OFF) +ELSE() + OPTION(CPUINFO_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" ON) +ENDIF() +OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) +OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) +OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) +OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) +OPTION(CPUINFO_BUILD_PKG_CONFIG "Build pkg-config manifest" OFF) +OPTION(USE_SYSTEM_LIBS "Use system libraries instead of downloading and building them" OFF) +OPTION(USE_SYSTEM_GOOGLEBENCHMARK "Use system Google Benchmark library instead of downloading and building it" ${USE_SYSTEM_LIBS}) +OPTION(USE_SYSTEM_GOOGLETEST "Use system Google Test library instead of downloading and building it" ${USE_SYSTEM_LIBS}) + +# ---[ CMake options +INCLUDE(GNUInstallDirs) + +IF(CPUINFO_BUILD_UNIT_TESTS OR CPUINFO_BUILD_MOCK_TESTS) + ENABLE_TESTING() +ENDIF() MACRO(CPUINFO_TARGET_ENABLE_C99 target) SET_TARGET_PROPERTIES(${target} PROPERTIES @@ -22,7 +42,7 @@ ENDMACRO() MACRO(CPUINFO_TARGET_ENABLE_CXX11 target) SET_TARGET_PROPERTIES(${target} PROPERTIES - CXX_STANDARD 11 + CXX_STANDARD 14 CXX_EXTENSIONS NO) ENDMACRO() @@ -38,10 +58,29 @@ MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target) ENDIF() ENDMACRO() +# -- [ Determine whether building for Apple's desktop or mobile OSes +IF(CMAKE_SYSTEM_NAME MATCHES "^(Darwin|iOS|tvOS|watchOS)$") + SET(IS_APPLE_OS TRUE) +ELSE() + SET(IS_APPLE_OS FALSE) +ENDIF() + # -- [ Determine target processor SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") -IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$") +IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$") SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") +ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_VS_PLATFORM_NAME) + IF(CMAKE_VS_PLATFORM_NAME STREQUAL "Win32") + SET(CPUINFO_TARGET_PROCESSOR "x86") + ELSEIF(CMAKE_VS_PLATFORM_NAME STREQUAL "x64") + SET(CPUINFO_TARGET_PROCESSOR "x86_64") + ELSEIF(CMAKE_VS_PLATFORM_NAME STREQUAL "ARM64") + SET(CPUINFO_TARGET_PROCESSOR "arm64") + ELSEIF(CMAKE_VS_PLATFORM_NAME MATCHES "^(ARM64EC|arm64ec|ARM64E|arm64e)") + SET(CPUINFO_TARGET_PROCESSOR "arm64") + ELSE() + MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_VS_PLATFORM_NAME}\"") + ENDIF() ENDIF() # ---[ Build flags @@ -53,20 +92,21 @@ IF(NOT CMAKE_SYSTEM_PROCESSOR) "cpuinfo will compile, but cpuinfo_initialize() will always fail.") SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ENDIF() -ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$") +ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64.*|ARM64.*|riscv(32|64))$") MESSAGE(WARNING "Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. " "cpuinfo will compile, but cpuinfo_initialize() will always fail.") SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ENDIF() + IF(NOT CMAKE_SYSTEM_NAME) MESSAGE(WARNING "Target operating system is not specified. " "cpuinfo will compile, but cpuinfo_initialize() will always fail.") SET(CPUINFO_SUPPORTED_PLATFORM FALSE) -ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$") - IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS") +ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS|Darwin|Linux|Android)$") + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT IS_APPLE_OS) MESSAGE(WARNING "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. " "cpuinfo will compile, but cpuinfo_initialize() will always fail.") @@ -74,11 +114,48 @@ ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android ENDIF() ENDIF() +IF(CPUINFO_SUPPORTED_PLATFORM) + IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS OR CPUINFO_BUILD_BENCHMARKS) + ENABLE_LANGUAGE(CXX) + ENDIF() +ENDIF() + +# ---[ Download deps +SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps + CACHE PATH "Confu-style dependencies source directory") +SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps + CACHE PATH "Confu-style dependencies binary directory") + +IF(CPUINFO_SUPPORTED_PLATFORM AND (CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS)) + IF(USE_SYSTEM_GOOGLETEST) + FIND_PACKAGE(GTest REQUIRED) + ELSEIF(NOT DEFINED GOOGLETEST_SOURCE_DIR) + MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)") + CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . + WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build . + WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download") + SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory") + ENDIF() +ENDIF() + +IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS) + IF(USE_SYSTEM_GOOGLEBENCHMARK) + FIND_PACKAGE(benchmark REQUIRED) + ELSEIF(NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR) + MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)") + CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . + WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build . + WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download") + SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory") + ENDIF() +ENDIF() + # ---[ cpuinfo library -SET(CPUINFO_SRCS - src/init.c - src/api.c - src/cache.c) +SET(CPUINFO_SRCS src/api.c src/cache.c src/init.c src/log.c) IF(CPUINFO_SUPPORTED_PLATFORM) IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")) @@ -97,12 +174,16 @@ IF(CPUINFO_SUPPORTED_PLATFORM) LIST(APPEND CPUINFO_SRCS src/x86/linux/init.c src/x86/linux/cpuinfo.c) - ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") + ELSEIF(IS_APPLE_OS) LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c) - ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$") + ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS)$") LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c) ENDIF() - ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$") + ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^Windows" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(ARM64|arm64)$") + LIST(APPEND CPUINFO_SRCS + src/arm/windows/init-by-logical-sys-info.c + src/arm/windows/init.c) + ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64.*)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$") LIST(APPEND CPUINFO_SRCS src/arm/uarch.c src/arm/cache.c) @@ -122,13 +203,22 @@ IF(CPUINFO_SUPPORTED_PLATFORM) ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$") LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c) ENDIF() - ELSEIF(IOS OR (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CPUINFO_TARGET_PROCESSOR STREQUAL "arm64")) + ELSEIF(IS_APPLE_OS AND CPUINFO_TARGET_PROCESSOR MATCHES "arm64.*") LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c) ENDIF() IF(CMAKE_SYSTEM_NAME STREQUAL "Android") LIST(APPEND CPUINFO_SRCS src/arm/android/properties.c) ENDIF() + ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(riscv(32|64))$") + LIST(APPEND CPUINFO_SRCS + src/riscv/uarch.c) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + LIST(APPEND CPUINFO_SRCS + src/riscv/linux/init.c + src/riscv/linux/riscv-hw.c + src/riscv/linux/riscv-isa.c) + ENDIF() ENDIF() IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") @@ -142,7 +232,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM) src/linux/multiline.c src/linux/cpulist.c src/linux/processors.c) - ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") + ELSEIF(IS_APPLE_OS) LIST(APPEND CPUINFO_SRCS src/mach/topology.c) ENDIF() @@ -166,34 +256,43 @@ ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS}) CPUINFO_TARGET_ENABLE_C99(cpuinfo) CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals) CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo) -IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$") +IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS)$") # Target Windows 7+ API - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601) - TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601 _CRT_SECURE_NO_WARNINGS) + TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601 _CRT_SECURE_NO_WARNINGS) + # Explicitly link Kernel32 for UWP build + if(CMAKE_SYSTEM_NAME STREQUAL "WindowsStore") + TARGET_LINK_LIBRARIES(cpuinfo PUBLIC Kernel32) + endif() +ENDIF() +IF(ANDROID AND NOT CPUINFO_LOG_TO_STDIO) + TARGET_LINK_LIBRARIES(cpuinfo PRIVATE "log") ENDIF() SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h) -TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include) +TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC $ $) TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src) TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src) +TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_TO_STDIO=$") IF(CPUINFO_LOG_LEVEL STREQUAL "default") # default logging level: error (subject to change) - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=2") ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug") - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=5") ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info") - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=4") ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning") - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=3") ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error") - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=2") ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal") - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=1") ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none") - TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE "CPUINFO_LOG_LEVEL=0") ELSE() MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}") ENDIF() -TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE CPUINFO_LOG_LEVEL=0) +TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE "CPUINFO_LOG_LEVEL=0") +TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE "CPUINFO_LOG_TO_STDIO=1") IF(CPUINFO_SUPPORTED_PLATFORM) TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1) @@ -207,17 +306,610 @@ ELSE() TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0) ENDIF() -# ---[ cpuinfo dependencies: clog -IF(NOT DEFINED CLOG_SOURCE_DIR) - SET(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog") +ADD_LIBRARY(${PROJECT_NAME}::cpuinfo ALIAS cpuinfo) + +# support find_package(cpuinfo CONFIG) +INCLUDE(CMakePackageConfigHelpers) +GET_FILENAME_COMPONENT(CONFIG_FILE_PATH ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo-config.cmake ABSOLUTE) +CONFIGURE_PACKAGE_CONFIG_FILE( + cmake/cpuinfo-config.cmake.in ${CONFIG_FILE_PATH} + INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}) +INSTALL(FILES ${CONFIG_FILE_PATH} + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}) # cpuinfo_DIR ${prefix}/share/cpuinfo + +INSTALL(TARGETS cpuinfo + EXPORT cpuinfo-targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +INSTALL(EXPORT cpuinfo-targets + NAMESPACE ${PROJECT_NAME}:: # IMPORTED cpuinfo::cpuinfo + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}) + +# ---[ cpuinfo micro-benchmarks +IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS) + # ---[ Build google benchmark + IF(NOT TARGET benchmark AND NOT USE_SYSTEM_GOOGLEBENCHMARK) + SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "") + ADD_SUBDIRECTORY( + "${GOOGLEBENCHMARK_SOURCE_DIR}" + "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark") + ENDIF() + + IF(CMAKE_SYSTEM_NAME MATCHES "^(Linux|Android)$") + ADD_EXECUTABLE(get-current-bench bench/get-current.cc) + TARGET_LINK_LIBRARIES(get-current-bench cpuinfo benchmark) + ENDIF() + + ADD_EXECUTABLE(init-bench bench/init.cc) + TARGET_LINK_LIBRARIES(init-bench cpuinfo benchmark) +ENDIF() + +IF(CPUINFO_SUPPORTED_PLATFORM) + IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS) + # ---[ Build google test + IF(NOT TARGET gtest AND NOT USE_SYSTEM_GOOGLETEST) + IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "static") + SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + ENDIF() + ADD_SUBDIRECTORY( + "${GOOGLETEST_SOURCE_DIR}" + "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest") + ENDIF() + ENDIF() ENDIF() -IF(NOT TARGET clog) - SET(CLOG_BUILD_TESTS OFF CACHE BOOL "") - SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "") - ADD_SUBDIRECTORY( - "${CLOG_SOURCE_DIR}") - # We build static version of clog but a dynamic library may indirectly depend on it - SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON) + +# ---[ cpuinfo mock library and mock tests +IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) + SET(CPUINFO_MOCK_SRCS "${CPUINFO_SRCS}") + IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$") + LIST(APPEND CPUINFO_MOCK_SRCS src/x86/mockcpuid.c) + ENDIF() + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + LIST(APPEND CPUINFO_MOCK_SRCS src/linux/mockfile.c) + ENDIF() + + ADD_LIBRARY(cpuinfo_mock STATIC ${CPUINFO_MOCK_SRCS}) + CPUINFO_TARGET_ENABLE_C99(cpuinfo_mock) + CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo_mock) + SET_TARGET_PROPERTIES(cpuinfo_mock PROPERTIES PUBLIC_HEADER include/cpuinfo.h) + TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PUBLIC include) + TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PRIVATE src) + TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PUBLIC "CPUINFO_MOCK=1") + TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE "CPUINFO_LOG_LEVEL=5") + TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE "CPUINFO_LOG_TO_STDIO=1") + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + TARGET_LINK_LIBRARIES(cpuinfo_mock PUBLIC ${CMAKE_THREAD_LIBS_INIT}) + TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE _GNU_SOURCE=1) + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a)$") + ADD_EXECUTABLE(atm7029b-tablet-test test/mock/atm7029b-tablet.cc) + TARGET_INCLUDE_DIRECTORIES(atm7029b-tablet-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(atm7029b-tablet-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME atm7029b-tablet-test COMMAND atm7029b-tablet-test) + + ADD_EXECUTABLE(blu-r1-hd-test test/mock/blu-r1-hd.cc) + TARGET_INCLUDE_DIRECTORIES(blu-r1-hd-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(blu-r1-hd-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME blu-r1-hd-test COMMAND blu-r1-hd-test) + + ADD_EXECUTABLE(galaxy-a3-2016-eu-test test/mock/galaxy-a3-2016-eu.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-a3-2016-eu-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-a3-2016-eu-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-a3-2016-eu-test COMMAND galaxy-a3-2016-eu-test) + + ADD_EXECUTABLE(galaxy-a8-2016-duos-test test/mock/galaxy-a8-2016-duos.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2016-duos-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-a8-2016-duos-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-a8-2016-duos-test COMMAND galaxy-a8-2016-duos-test) + + ADD_EXECUTABLE(galaxy-grand-prime-value-edition-test test/mock/galaxy-grand-prime-value-edition.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-grand-prime-value-edition-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-grand-prime-value-edition-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-grand-prime-value-edition-test COMMAND galaxy-grand-prime-value-edition-test) + + ADD_EXECUTABLE(galaxy-j1-2016-test test/mock/galaxy-j1-2016.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-j1-2016-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-j1-2016-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-j1-2016-test COMMAND galaxy-j1-2016-test) + + ADD_EXECUTABLE(galaxy-j5-test test/mock/galaxy-j5.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-j5-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-j5-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-j5-test COMMAND galaxy-j5-test) + + ADD_EXECUTABLE(galaxy-j7-prime-test test/mock/galaxy-j7-prime.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-j7-prime-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-j7-prime-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-j7-prime-test COMMAND galaxy-j7-prime-test) + + ADD_EXECUTABLE(galaxy-j7-tmobile-test test/mock/galaxy-j7-tmobile.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-j7-tmobile-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-j7-tmobile-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-j7-tmobile-test COMMAND galaxy-j7-tmobile-test) + + ADD_EXECUTABLE(galaxy-j7-uae-test test/mock/galaxy-j7-uae.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-j7-uae-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-j7-uae-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-j7-uae-test COMMAND galaxy-j7-uae-test) + + ADD_EXECUTABLE(galaxy-s3-us-test test/mock/galaxy-s3-us.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s3-us-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s3-us-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s3-us-test COMMAND galaxy-s3-us-test) + + ADD_EXECUTABLE(galaxy-s4-us-test test/mock/galaxy-s4-us.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s4-us-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s4-us-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s4-us-test COMMAND galaxy-s4-us-test) + + ADD_EXECUTABLE(galaxy-s5-global-test test/mock/galaxy-s5-global.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s5-global-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s5-global-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s5-global-test COMMAND galaxy-s5-global-test) + + ADD_EXECUTABLE(galaxy-s5-us-test test/mock/galaxy-s5-us.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s5-us-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s5-us-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s5-us-test COMMAND galaxy-s5-us-test) + + ADD_EXECUTABLE(galaxy-tab-3-7.0-test test/mock/galaxy-tab-3-7.0.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-7.0-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-tab-3-7.0-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-tab-3-7.0-test COMMAND galaxy-tab-3-7.0-test) + + ADD_EXECUTABLE(galaxy-tab-3-lite-test test/mock/galaxy-tab-3-lite.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-lite-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-tab-3-lite-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-tab-3-lite-test COMMAND galaxy-tab-3-lite-test) + + ADD_EXECUTABLE(galaxy-win-duos-test test/mock/galaxy-win-duos.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-win-duos-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-win-duos-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-win-duos-test COMMAND galaxy-win-duos-test) + + ADD_EXECUTABLE(huawei-ascend-p7-test test/mock/huawei-ascend-p7.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-ascend-p7-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-ascend-p7-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-ascend-p7-test COMMAND huawei-ascend-p7-test) + + ADD_EXECUTABLE(huawei-honor-6-test test/mock/huawei-honor-6.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-honor-6-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-honor-6-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-honor-6-test COMMAND huawei-honor-6-test) + + ADD_EXECUTABLE(lenovo-a6600-plus-test test/mock/lenovo-a6600-plus.cc) + TARGET_INCLUDE_DIRECTORIES(lenovo-a6600-plus-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(lenovo-a6600-plus-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME lenovo-a6600-plus-test COMMAND lenovo-a6600-plus-test) + + ADD_EXECUTABLE(lenovo-vibe-x2-test test/mock/lenovo-vibe-x2.cc) + TARGET_INCLUDE_DIRECTORIES(lenovo-vibe-x2-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(lenovo-vibe-x2-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME lenovo-vibe-x2-test COMMAND lenovo-vibe-x2-test) + + ADD_EXECUTABLE(lg-k10-eu-test test/mock/lg-k10-eu.cc) + TARGET_INCLUDE_DIRECTORIES(lg-k10-eu-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(lg-k10-eu-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME lg-k10-eu-test COMMAND lg-k10-eu-test) + + ADD_EXECUTABLE(lg-optimus-g-pro-test test/mock/lg-optimus-g-pro.cc) + TARGET_INCLUDE_DIRECTORIES(lg-optimus-g-pro-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(lg-optimus-g-pro-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME lg-optimus-g-pro-test COMMAND lg-optimus-g-pro-test) + + ADD_EXECUTABLE(moto-e-gen1-test test/mock/moto-e-gen1.cc) + TARGET_INCLUDE_DIRECTORIES(moto-e-gen1-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(moto-e-gen1-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME moto-e-gen1-test COMMAND moto-e-gen1-test) + + ADD_EXECUTABLE(moto-g-gen1-test test/mock/moto-g-gen1.cc) + TARGET_INCLUDE_DIRECTORIES(moto-g-gen1-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(moto-g-gen1-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME moto-g-gen1-test COMMAND moto-g-gen1-test) + + ADD_EXECUTABLE(moto-g-gen2-test test/mock/moto-g-gen2.cc) + TARGET_INCLUDE_DIRECTORIES(moto-g-gen2-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(moto-g-gen2-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME moto-g-gen2-test COMMAND moto-g-gen2-test) + + ADD_EXECUTABLE(moto-g-gen3-test test/mock/moto-g-gen3.cc) + TARGET_INCLUDE_DIRECTORIES(moto-g-gen3-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(moto-g-gen3-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME moto-g-gen3-test COMMAND moto-g-gen3-test) + + ADD_EXECUTABLE(moto-g-gen4-test test/mock/moto-g-gen4.cc) + TARGET_INCLUDE_DIRECTORIES(moto-g-gen4-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(moto-g-gen4-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME moto-g-gen4-test COMMAND moto-g-gen4-test) + + ADD_EXECUTABLE(moto-g-gen5-test test/mock/moto-g-gen5.cc) + TARGET_INCLUDE_DIRECTORIES(moto-g-gen5-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(moto-g-gen5-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME moto-g-gen5-test COMMAND moto-g-gen5-test) + + ADD_EXECUTABLE(nexus-s-test test/mock/nexus-s.cc) + TARGET_INCLUDE_DIRECTORIES(nexus-s-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus-s-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus-s-test COMMAND nexus-s-test) + + ADD_EXECUTABLE(nexus4-test test/mock/nexus4.cc) + TARGET_INCLUDE_DIRECTORIES(nexus4-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus4-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus4-test COMMAND nexus4-test) + + ADD_EXECUTABLE(nexus6-test test/mock/nexus6.cc) + TARGET_INCLUDE_DIRECTORIES(nexus6-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus6-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus6-test COMMAND nexus6-test) + + ADD_EXECUTABLE(nexus10-test test/mock/nexus10.cc) + TARGET_INCLUDE_DIRECTORIES(nexus10-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus10-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus10-test COMMAND nexus10-test) + + ADD_EXECUTABLE(padcod-10.1-test test/mock/padcod-10.1.cc) + TARGET_INCLUDE_DIRECTORIES(padcod-10.1-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(padcod-10.1-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME padcod-10.1-test COMMAND padcod-10.1-test) + + ADD_EXECUTABLE(xiaomi-redmi-2a-test test/mock/xiaomi-redmi-2a.cc) + TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-2a-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(xiaomi-redmi-2a-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME xiaomi-redmi-2a-test COMMAND xiaomi-redmi-2a-test) + + ADD_EXECUTABLE(xperia-sl-test test/mock/xperia-sl.cc) + TARGET_INCLUDE_DIRECTORIES(xperia-sl-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(xperia-sl-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME xperia-sl-test COMMAND xperia-sl-test) + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$") + ADD_EXECUTABLE(alcatel-revvl-test test/mock/alcatel-revvl.cc) + TARGET_INCLUDE_DIRECTORIES(alcatel-revvl-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(alcatel-revvl-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME alcatel-revvl-test COMMAND alcatel-revvl-test) + + ADD_EXECUTABLE(galaxy-a8-2018-test test/mock/galaxy-a8-2018.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2018-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-a8-2018-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-a8-2018-test COMMAND galaxy-a8-2018-test) + + ADD_EXECUTABLE(galaxy-c9-pro-test test/mock/galaxy-c9-pro.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-c9-pro-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-c9-pro-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-c9-pro-test COMMAND galaxy-c9-pro-test) + + ADD_EXECUTABLE(galaxy-s6-test test/mock/galaxy-s6.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s6-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s6-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s6-test COMMAND galaxy-s6-test) + + ADD_EXECUTABLE(galaxy-s7-us-test test/mock/galaxy-s7-us.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s7-us-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s7-us-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s7-us-test COMMAND galaxy-s7-us-test) + + ADD_EXECUTABLE(galaxy-s7-global-test test/mock/galaxy-s7-global.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s7-global-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s7-global-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s7-global-test COMMAND galaxy-s7-global-test) + + ADD_EXECUTABLE(galaxy-s8-us-test test/mock/galaxy-s8-us.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s8-us-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s8-us-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s8-us-test COMMAND galaxy-s8-us-test) + + ADD_EXECUTABLE(galaxy-s8-global-test test/mock/galaxy-s8-global.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s8-global-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s8-global-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s8-global-test COMMAND galaxy-s8-global-test) + + ADD_EXECUTABLE(galaxy-s9-us-test test/mock/galaxy-s9-us.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s9-us-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s9-us-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s9-us-test COMMAND galaxy-s9-us-test) + + ADD_EXECUTABLE(galaxy-s9-global-test test/mock/galaxy-s9-global.cc) + TARGET_INCLUDE_DIRECTORIES(galaxy-s9-global-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(galaxy-s9-global-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME galaxy-s9-global-test COMMAND galaxy-s9-global-test) + + ADD_EXECUTABLE(huawei-mate-8-test test/mock/huawei-mate-8.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-mate-8-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-mate-8-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-mate-8-test COMMAND huawei-mate-8-test) + + ADD_EXECUTABLE(huawei-mate-9-test test/mock/huawei-mate-9.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-mate-9-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-mate-9-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-mate-9-test COMMAND huawei-mate-9-test) + + ADD_EXECUTABLE(huawei-mate-10-test test/mock/huawei-mate-10.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-mate-10-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-mate-10-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-mate-10-test COMMAND huawei-mate-10-test) + + ADD_EXECUTABLE(huawei-mate-20-test test/mock/huawei-mate-20.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-mate-20-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-mate-20-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-mate-20-test COMMAND huawei-mate-20-test) + + ADD_EXECUTABLE(huawei-p8-lite-test test/mock/huawei-p8-lite.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-p8-lite-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-p8-lite-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-p8-lite-test COMMAND huawei-p8-lite-test) + + ADD_EXECUTABLE(huawei-p9-lite-test test/mock/huawei-p9-lite.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-p9-lite-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-p9-lite-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-p9-lite-test COMMAND huawei-p9-lite-test) + + ADD_EXECUTABLE(huawei-p20-pro-test test/mock/huawei-p20-pro.cc) + TARGET_INCLUDE_DIRECTORIES(huawei-p20-pro-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(huawei-p20-pro-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME huawei-p20-pro-test COMMAND huawei-p20-pro-test) + + ADD_EXECUTABLE(iconia-one-10-test test/mock/iconia-one-10.cc) + TARGET_INCLUDE_DIRECTORIES(iconia-one-10-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(iconia-one-10-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME iconia-one-10-test COMMAND iconia-one-10-test) + + ADD_EXECUTABLE(meizu-pro-6-test test/mock/meizu-pro-6.cc) + TARGET_INCLUDE_DIRECTORIES(meizu-pro-6-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(meizu-pro-6-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME meizu-pro-6-test COMMAND meizu-pro-6-test) + + ADD_EXECUTABLE(meizu-pro-6s-test test/mock/meizu-pro-6s.cc) + TARGET_INCLUDE_DIRECTORIES(meizu-pro-6s-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(meizu-pro-6s-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME meizu-pro-6s-test COMMAND meizu-pro-6s-test) + + ADD_EXECUTABLE(meizu-pro-7-plus-test test/mock/meizu-pro-7-plus.cc) + TARGET_INCLUDE_DIRECTORIES(meizu-pro-7-plus-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(meizu-pro-7-plus-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME meizu-pro-7-plus-test COMMAND meizu-pro-7-plus-test) + + ADD_EXECUTABLE(nexus5x-test test/mock/nexus5x.cc) + TARGET_INCLUDE_DIRECTORIES(nexus5x-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus5x-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus5x-test COMMAND nexus5x-test) + + ADD_EXECUTABLE(nexus6p-test test/mock/nexus6p.cc) + TARGET_INCLUDE_DIRECTORIES(nexus6p-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus6p-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus6p-test COMMAND nexus6p-test) + + ADD_EXECUTABLE(nexus9-test test/mock/nexus9.cc) + TARGET_INCLUDE_DIRECTORIES(nexus9-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(nexus9-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME nexus9-test COMMAND nexus9-test) + + ADD_EXECUTABLE(oneplus-3t-test test/mock/oneplus-3t.cc) + TARGET_INCLUDE_DIRECTORIES(oneplus-3t-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(oneplus-3t-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME oneplus-3t-test COMMAND oneplus-3t-test) + + ADD_EXECUTABLE(oneplus-5-test test/mock/oneplus-5.cc) + TARGET_INCLUDE_DIRECTORIES(oneplus-5-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(oneplus-5-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME oneplus-5-test COMMAND oneplus-5-test) + + ADD_EXECUTABLE(oneplus-5t-test test/mock/oneplus-5t.cc) + TARGET_INCLUDE_DIRECTORIES(oneplus-5t-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(oneplus-5t-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME oneplus-5t-test COMMAND oneplus-5t-test) + + ADD_EXECUTABLE(oppo-a37-test test/mock/oppo-a37.cc) + TARGET_INCLUDE_DIRECTORIES(oppo-a37-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(oppo-a37-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME oppo-a37-test COMMAND oppo-a37-test) + + ADD_EXECUTABLE(oppo-r9-test test/mock/oppo-r9.cc) + TARGET_INCLUDE_DIRECTORIES(oppo-r9-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(oppo-r9-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME oppo-r9-test COMMAND oppo-r9-test) + + ADD_EXECUTABLE(oppo-r15-test test/mock/oppo-r15.cc) + TARGET_INCLUDE_DIRECTORIES(oppo-r15-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(oppo-r15-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME oppo-r15-test COMMAND oppo-r15-test) + + ADD_EXECUTABLE(pixel-test test/mock/pixel.cc) + TARGET_INCLUDE_DIRECTORIES(pixel-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(pixel-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME pixel-test COMMAND pixel-test) + + ADD_EXECUTABLE(pixel-c-test test/mock/pixel-c.cc) + TARGET_INCLUDE_DIRECTORIES(pixel-c-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(pixel-c-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME pixel-c-test COMMAND pixel-c-test) + + ADD_EXECUTABLE(pixel-xl-test test/mock/pixel-xl.cc) + TARGET_INCLUDE_DIRECTORIES(pixel-xl-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(pixel-xl-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME pixel-xl-test COMMAND pixel-xl-test) + + ADD_EXECUTABLE(pixel-2-xl-test test/mock/pixel-2-xl.cc) + TARGET_INCLUDE_DIRECTORIES(pixel-2-xl-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(pixel-2-xl-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME pixel-2-xl-test COMMAND pixel-2-xl-test) + + ADD_EXECUTABLE(xiaomi-mi-5c-test test/mock/xiaomi-mi-5c.cc) + TARGET_INCLUDE_DIRECTORIES(xiaomi-mi-5c-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(xiaomi-mi-5c-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME xiaomi-mi-5c-test COMMAND xiaomi-mi-5c-test) + + ADD_EXECUTABLE(xiaomi-redmi-note-3-test test/mock/xiaomi-redmi-note-3.cc) + TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-3-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(xiaomi-redmi-note-3-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME xiaomi-redmi-note-3-test COMMAND xiaomi-redmi-note-3-test) + + ADD_EXECUTABLE(xiaomi-redmi-note-4-test test/mock/xiaomi-redmi-note-4.cc) + TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-4-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(xiaomi-redmi-note-4-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME xiaomi-redmi-note-4-test COMMAND xiaomi-redmi-note-4-test) + + ADD_EXECUTABLE(xperia-c4-dual-test test/mock/xperia-c4-dual.cc) + TARGET_INCLUDE_DIRECTORIES(xperia-c4-dual-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(xperia-c4-dual-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME xperia-c4-dual-test COMMAND xperia-c4-dual-test) + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64)$") + ADD_EXECUTABLE(alldocube-iwork8-test test/mock/alldocube-iwork8.cc) + TARGET_INCLUDE_DIRECTORIES(alldocube-iwork8-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(alldocube-iwork8-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME alldocube-iwork8-test COMMAND alldocube-iwork8-test) + + ADD_EXECUTABLE(leagoo-t5c-test test/mock/leagoo-t5c.cc) + TARGET_INCLUDE_DIRECTORIES(leagoo-t5c-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(leagoo-t5c-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME leagoo-t5c-test COMMAND leagoo-t5c-test) + + ADD_EXECUTABLE(memo-pad-7-test test/mock/memo-pad-7.cc) + TARGET_INCLUDE_DIRECTORIES(memo-pad-7-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(memo-pad-7-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME memo-pad-7-test COMMAND memo-pad-7-test) + + ADD_EXECUTABLE(zenfone-c-test test/mock/zenfone-c.cc) + TARGET_INCLUDE_DIRECTORIES(zenfone-c-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(zenfone-c-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME zenfone-c-test COMMAND zenfone-c-test) + + ADD_EXECUTABLE(zenfone-2-test test/mock/zenfone-2.cc) + TARGET_INCLUDE_DIRECTORIES(zenfone-2-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(zenfone-2-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME zenfone-2-test COMMAND zenfone-2-test) + + ADD_EXECUTABLE(zenfone-2e-test test/mock/zenfone-2e.cc) + TARGET_INCLUDE_DIRECTORIES(zenfone-2e-test BEFORE PRIVATE test/mock) + TARGET_LINK_LIBRARIES(zenfone-2e-test PRIVATE cpuinfo_mock gtest) + ADD_TEST(NAME zenfone-2e-test COMMAND zenfone-2e-test) + ENDIF() +ENDIF() + +# ---[ cpuinfo unit tests +IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_TESTS) + ADD_EXECUTABLE(init-test test/init.cc) + CPUINFO_TARGET_ENABLE_CXX11(init-test) + CPUINFO_TARGET_RUNTIME_LIBRARY(init-test) + TARGET_LINK_LIBRARIES(init-test PRIVATE cpuinfo gtest gtest_main) + ADD_TEST(NAME init-test COMMAND init-test) + + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + ADD_EXECUTABLE(get-current-test test/get-current.cc) + CPUINFO_TARGET_ENABLE_CXX11(get-current-test) + CPUINFO_TARGET_RUNTIME_LIBRARY(get-current-test) + TARGET_LINK_LIBRARIES(get-current-test PRIVATE cpuinfo gtest gtest_main) + ADD_TEST(NAME get-current-test COMMAND get-current-test) + ENDIF() + + IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$") + ADD_EXECUTABLE(brand-string-test test/name/brand-string.cc) + CPUINFO_TARGET_ENABLE_CXX11(brand-string-test) + CPUINFO_TARGET_RUNTIME_LIBRARY(brand-string-test) + TARGET_LINK_LIBRARIES(brand-string-test PRIVATE cpuinfo_internals gtest gtest_main) + ADD_TEST(NAME brand-string-test COMMAND brand-string-test) + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") + ADD_LIBRARY(android_properties_interface STATIC test/name/android-properties-interface.c) + CPUINFO_TARGET_ENABLE_C99(android_properties_interface) + CPUINFO_TARGET_RUNTIME_LIBRARY(android_properties_interface) + TARGET_LINK_LIBRARIES(android_properties_interface PRIVATE cpuinfo_internals) + + ADD_EXECUTABLE(chipset-test + test/name/proc-cpuinfo-hardware.cc + test/name/ro-product-board.cc + test/name/ro-board-platform.cc + test/name/ro-mediatek-platform.cc + test/name/ro-arch.cc + test/name/ro-chipname.cc + test/name/android-properties.cc) + CPUINFO_TARGET_ENABLE_CXX11(chipset-test) + CPUINFO_TARGET_RUNTIME_LIBRARY(chipset-test) + TARGET_LINK_LIBRARIES(chipset-test PRIVATE android_properties_interface gtest gtest_main) + ADD_TEST(NAME chipset-test COMMAND chipset-test) + + ADD_EXECUTABLE(cache-test test/arm-cache.cc) + CPUINFO_TARGET_ENABLE_CXX11(cache-test) + CPUINFO_TARGET_RUNTIME_LIBRARY(cache-test) + TARGET_COMPILE_DEFINITIONS(cache-test PRIVATE __STDC_LIMIT_MACROS=1 __STDC_CONSTANT_MACROS=1) + TARGET_LINK_LIBRARIES(cache-test PRIVATE cpuinfo_internals gtest gtest_main) + ADD_TEST(NAME cache-test COMMAND cache-test) + ENDIF() +ENDIF() + +# ---[ Helper and debug tools +IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS) + ADD_EXECUTABLE(isa-info tools/isa-info.c) + CPUINFO_TARGET_ENABLE_C99(isa-info) + CPUINFO_TARGET_RUNTIME_LIBRARY(isa-info) + TARGET_LINK_LIBRARIES(isa-info PRIVATE cpuinfo) + INSTALL(TARGETS isa-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + + ADD_EXECUTABLE(cpu-info tools/cpu-info.c) + CPUINFO_TARGET_ENABLE_C99(cpu-info) + CPUINFO_TARGET_RUNTIME_LIBRARY(cpu-info) + TARGET_LINK_LIBRARIES(cpu-info PRIVATE cpuinfo) + INSTALL(TARGETS cpu-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + + ADD_EXECUTABLE(cache-info tools/cache-info.c) + CPUINFO_TARGET_ENABLE_C99(cache-info) + CPUINFO_TARGET_RUNTIME_LIBRARY(cache-info) + TARGET_LINK_LIBRARIES(cache-info PRIVATE cpuinfo) + INSTALL(TARGETS cache-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + + IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") + ADD_EXECUTABLE(auxv-dump tools/auxv-dump.c) + CPUINFO_TARGET_ENABLE_C99(auxv-dump) + CPUINFO_TARGET_RUNTIME_LIBRARY(auxv-dump) + TARGET_LINK_LIBRARIES(auxv-dump PRIVATE ${CMAKE_DL_LIBS} cpuinfo) + + ADD_EXECUTABLE(cpuinfo-dump tools/cpuinfo-dump.c) + CPUINFO_TARGET_ENABLE_C99(cpuinfo-dump) + CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo-dump) + ENDIF() + + IF(CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$") + ADD_EXECUTABLE(cpuid-dump tools/cpuid-dump.c) + CPUINFO_TARGET_ENABLE_C99(cpuid-dump) + CPUINFO_TARGET_RUNTIME_LIBRARY(cpuid-dump) + TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE src) + TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE include) + INSTALL(TARGETS cpuid-dump RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + ENDIF() +ENDIF() + +# ---[ pkg-config manifest. This is mostly from JsonCpp... +IF(CPUINFO_BUILD_PKG_CONFIG) + + FUNCTION(JOIN_PATHS joined_path first_path_segment) + SET(temp_path "${first_path_segment}") + FOREACH(current_segment IN LISTS ARGN) + IF(NOT ("${current_segment}" STREQUAL "")) + IF(IS_ABSOLUTE "${current_segment}") + SET(temp_path "${current_segment}") + ELSE() + SET(temp_path "${temp_path}/${current_segment}") + ENDIF() + ENDIF() + ENDFOREACH() + SET(${joined_path} "${temp_path}" PARENT_SCOPE) + ENDFUNCTION() + + JOIN_PATHS(libdir_for_pc_file "\${exec_prefix}" "${CMAKE_INSTALL_LIBDIR}") + JOIN_PATHS(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") + + CONFIGURE_FILE( + "libcpuinfo.pc.in" + "libcpuinfo.pc" + @ONLY) + + INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/libcpuinfo.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") + ENDIF() -TARGET_LINK_LIBRARIES(cpuinfo PRIVATE clog) -TARGET_LINK_LIBRARIES(cpuinfo_internals PRIVATE clog) \ No newline at end of file diff --git a/3rdparty/cpuinfo/README.md b/3rdparty/cpuinfo/README.md index 0eb71a5d6c6bd..82cadea643c98 100644 --- a/3rdparty/cpuinfo/README.md +++ b/3rdparty/cpuinfo/README.md @@ -48,7 +48,7 @@ Detect if target is a 32-bit or 64-bit ARM system: #endif ``` -Check if the host CPU support ARM NEON +Check if the host CPU supports ARM NEON ```c cpuinfo_initialize(); @@ -151,6 +151,36 @@ executable( ) ``` +### Bazel + +This project can be built using [Bazel](https://bazel.build/install). + +You can also use this library as a dependency to your Bazel project. Add to the `WORKSPACE` file: + +```python +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") + +git_repository( + name = "org_pytorch_cpuinfo", + branch = "master", + remote = "https://github.com/Vertexwahn/cpuinfo.git", +) +``` + +And to your `BUILD` file: + +```python +cc_binary( + name = "cpuinfo_test", + srcs = [ + # ... + ], + deps = [ + "@org_pytorch_cpuinfo//:cpuinfo", + ], +) +``` + ### CMake To use with CMake use the [FindPkgConfig](https://cmake.org/cmake/help/latest/module/FindPkgConfig.html) module. Here is an example: @@ -220,12 +250,14 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo) - [x] x86-64 (iPhone simulator) - [x] ARMv7 - [x] ARM64 -- [x] OS X +- [x] macOS - [x] x86 - [x] x86-64 + - [x] ARM64 (Apple silicon) - [x] Windows - [x] x86 - [x] x86-64 + - [x] arm64 ## Methods @@ -234,12 +266,13 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo) - [x] Using `/proc/cpuinfo` on ARM - [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android) - [ ] Using kernel log (`dmesg`) on ARM Linux + - [x] Using Windows registry on ARM64 Windows - Vendor and microarchitecture detection - [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill) - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2) - [ ] VIA-designed x86/x86-64 cores - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise) - - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1) + - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/V1/N2/V2) - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo) - [x] Nvidia-designed ARM cores (Denver and Carmel) - [x] Samsung-designed ARM cores (Exynos) @@ -256,6 +289,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo) - [x] Using `/proc/self/auxv` (Android/ARM) - [ ] Using instruction probing on ARM (Linux) - [ ] Using CPUID registers on ARM64 (Linux) + - [x] Using IsProcessorFeaturePresent on ARM64 Windows - Cache detection - [x] Using CPUID leaf 0x00000002 (x86/x86-64) - [x] Using CPUID leaf 0x00000004 (non-AMD x86/x86-64) @@ -267,6 +301,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo) - [x] Using `sysctlbyname` (Mach) - [x] Using sysfs `typology` directories (ARM/Linux) - [ ] Using sysfs `cache` directories (Linux) + - [x] Using `GetLogicalProcessorInformationEx` on ARM64 Windows - TLB detection - [x] Using CPUID leaf 0x00000002 (x86/x86-64) - [ ] Using CPUID leaves 0x80000005-0x80000006 and 0x80000019 (AMD x86/x86-64) diff --git a/3rdparty/cpuinfo/cmake/DownloadGoogleBenchmark.cmake b/3rdparty/cpuinfo/cmake/DownloadGoogleBenchmark.cmake new file mode 100644 index 0000000000000..fd99d0ef818c1 --- /dev/null +++ b/3rdparty/cpuinfo/cmake/DownloadGoogleBenchmark.cmake @@ -0,0 +1,15 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) + +PROJECT(googlebenchmark-download NONE) + +INCLUDE(ExternalProject) +ExternalProject_Add(googlebenchmark + URL https://github.com/google/benchmark/archive/v1.6.1.zip + URL_HASH SHA256=367e963b8620080aff8c831e24751852cffd1f74ea40f25d9cc1b667a9dd5e45 + SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" + BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/3rdparty/cpuinfo/cmake/DownloadGoogleTest.cmake b/3rdparty/cpuinfo/cmake/DownloadGoogleTest.cmake new file mode 100644 index 0000000000000..c58fd0985c25b --- /dev/null +++ b/3rdparty/cpuinfo/cmake/DownloadGoogleTest.cmake @@ -0,0 +1,15 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) + +PROJECT(googletest-download NONE) + +INCLUDE(ExternalProject) +ExternalProject_Add(googletest + URL https://github.com/google/googletest/archive/release-1.11.0.zip + URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a + SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" + BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/3rdparty/cpuinfo/cmake/cpuinfo-config.cmake.in b/3rdparty/cpuinfo/cmake/cpuinfo-config.cmake.in new file mode 100644 index 0000000000000..fd52c8ca95c4c --- /dev/null +++ b/3rdparty/cpuinfo/cmake/cpuinfo-config.cmake.in @@ -0,0 +1,12 @@ +@PACKAGE_INIT@ + +get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +file(GLOB CONFIG_FILES "${_DIR}/cpuinfo-config-*.cmake") +foreach(f ${CONFIG_FILES}) + include(${f}) +endforeach() + +# ${_DIR}/cpuinfo-targets-*.cmake will be included here +include("${_DIR}/cpuinfo-targets.cmake") + +check_required_components(@PROJECT_NAME@) diff --git a/3rdparty/cpuinfo/cpuinfo.vcxproj b/3rdparty/cpuinfo/cpuinfo.vcxproj index dab240192fe86..623db781dd6b8 100644 --- a/3rdparty/cpuinfo/cpuinfo.vcxproj +++ b/3rdparty/cpuinfo/cpuinfo.vcxproj @@ -59,7 +59,7 @@ - %(PreprocessorDefinitions) + CPUINFO_LOG_LEVEL=0;%(PreprocessorDefinitions) TurnOffAllWarnings $(ProjectDir)include;$(ProjectDir)src;$(ProjectDir)deps\clog\include;%(AdditionalIncludeDirectories) $(IntDir)%(RelativeDir) diff --git a/3rdparty/cpuinfo/deps/clog/CMakeLists.txt b/3rdparty/cpuinfo/deps/clog/CMakeLists.txt index 0e65a1b8bdcd3..6e50c41c38a2b 100644 --- a/3rdparty/cpuinfo/deps/clog/CMakeLists.txt +++ b/3rdparty/cpuinfo/deps/clog/CMakeLists.txt @@ -1,5 +1,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.1 FATAL_ERROR) +INCLUDE(GNUInstallDirs) + # ---[ Project and semantic versioning. PROJECT(clog C CXX) @@ -11,6 +13,14 @@ IF(ANDROID) ELSE() OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" ON) ENDIF() +OPTION(CLOG_BUILD_TESTS "Build clog tests" ON) +OPTION(USE_SYSTEM_LIBS "Use system libraries instead of downloading and building them" OFF) +OPTION(USE_SYSTEM_GOOGLETEST "Use system Google Test library instead of downloading and building it" ${USE_SYSTEM_LIBS}) + +# ---[ CMake options +IF(CLOG_BUILD_TESTS) + ENABLE_TESTING() +ENDIF() MACRO(CLOG_TARGET_RUNTIME_LIBRARY target) IF(MSVC AND NOT CLOG_RUNTIME_TYPE STREQUAL "default") @@ -24,6 +34,26 @@ MACRO(CLOG_TARGET_RUNTIME_LIBRARY target) ENDIF() ENDMACRO() +# ---[ Download deps +SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps + CACHE PATH "Confu-style dependencies source directory") +SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps + CACHE PATH "Confu-style dependencies binary directory") + +IF(CLOG_BUILD_TESTS) + IF(USE_SYSTEM_GOOGLETEST) + FIND_PACKAGE(GTest REQUIRED) + ELSEIF(NOT DEFINED GOOGLETEST_SOURCE_DIR) + MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)") + CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . + WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download") + EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build . + WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download") + SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory") + ENDIF() +ENDIF() + # ---[ clog library ADD_LIBRARY(clog STATIC src/clog.c) SET_TARGET_PROPERTIES(clog PROPERTIES @@ -31,7 +61,7 @@ SET_TARGET_PROPERTIES(clog PROPERTIES C_EXTENSIONS NO) CLOG_TARGET_RUNTIME_LIBRARY(clog) SET_TARGET_PROPERTIES(clog PROPERTIES PUBLIC_HEADER include/clog.h) -TARGET_INCLUDE_DIRECTORIES(clog BEFORE PUBLIC include) +TARGET_INCLUDE_DIRECTORIES(clog PUBLIC $ $) IF(CLOG_LOG_TO_STDIO) TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=1) ELSE() @@ -39,4 +69,33 @@ ELSE() ENDIF() IF(ANDROID AND NOT CLOG_LOG_TO_STDIO) TARGET_LINK_LIBRARIES(clog PRIVATE log) -ENDIF() \ No newline at end of file +ENDIF() + +ADD_LIBRARY(cpuinfo::clog ALIAS clog) + +INSTALL(TARGETS clog + EXPORT cpuinfo-targets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + +# ---[ clog tests +IF(CLOG_BUILD_TESTS) + # ---[ Build google test + IF(NOT TARGET gtest AND NOT USE_SYSTEM_GOOGLETEST) + IF(MSVC AND NOT CLOG_RUNTIME_TYPE STREQUAL "static") + SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + ENDIF() + ADD_SUBDIRECTORY( + "${GOOGLETEST_SOURCE_DIR}" + "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest") + ENDIF() + + ADD_EXECUTABLE(clog-test test/clog.cc) + SET_TARGET_PROPERTIES(clog-test PROPERTIES + CXX_STANDARD 11 + CXX_EXTENSIONS NO) + CLOG_TARGET_RUNTIME_LIBRARY(clog-test) + TARGET_LINK_LIBRARIES(clog-test PRIVATE clog gtest gtest_main) + ADD_TEST(clog-test clog-test) +ENDIF() diff --git a/3rdparty/cpuinfo/deps/clog/src/clog.c b/3rdparty/cpuinfo/deps/clog/src/clog.c index fe5d43e1f6249..27658f92ca4a5 100644 --- a/3rdparty/cpuinfo/deps/clog/src/clog.c +++ b/3rdparty/cpuinfo/deps/clog/src/clog.c @@ -10,6 +10,9 @@ #ifdef __ANDROID__ #include #endif +#ifdef __hexagon__ + #include +#endif #ifndef CLOG_LOG_TO_STDIO #ifdef __ANDROID__ @@ -102,12 +105,14 @@ void clog_vlog_fatal(const char* module, const char* format, va_list args) { out_buffer = heap_buffer; } out_buffer[prefix_chars + format_chars] = '\n'; - #ifdef _WIN32 + #if defined(_WIN32) DWORD bytes_written; WriteFile( GetStdHandle(STD_ERROR_HANDLE), out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, &bytes_written, NULL); + #elif defined(__hexagon__) + qurt_printf("%s", out_buffer); #else write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); #endif @@ -178,12 +183,14 @@ void clog_vlog_error(const char* module, const char* format, va_list args) { out_buffer = heap_buffer; } out_buffer[prefix_chars + format_chars] = '\n'; - #ifdef _WIN32 + #if defined(_WIN32) DWORD bytes_written; WriteFile( GetStdHandle(STD_ERROR_HANDLE), out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, &bytes_written, NULL); + #elif defined(__hexagon__) + qurt_printf("%s", out_buffer); #else write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); #endif @@ -254,12 +261,14 @@ void clog_vlog_warning(const char* module, const char* format, va_list args) { out_buffer = heap_buffer; } out_buffer[prefix_chars + format_chars] = '\n'; - #ifdef _WIN32 + #if defined(_WIN32) DWORD bytes_written; WriteFile( GetStdHandle(STD_ERROR_HANDLE), out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, &bytes_written, NULL); + #elif defined(__hexagon__) + qurt_printf("%s", out_buffer); #else write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); #endif @@ -330,12 +339,14 @@ void clog_vlog_info(const char* module, const char* format, va_list args) { out_buffer = heap_buffer; } out_buffer[prefix_chars + format_chars] = '\n'; - #ifdef _WIN32 + #if defined(_WIN32) DWORD bytes_written; WriteFile( GetStdHandle(STD_OUTPUT_HANDLE), out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, &bytes_written, NULL); + #elif defined(__hexagon__) + qurt_printf("%s", out_buffer); #else write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); #endif @@ -406,12 +417,14 @@ void clog_vlog_debug(const char* module, const char* format, va_list args) { out_buffer = heap_buffer; } out_buffer[prefix_chars + format_chars] = '\n'; - #ifdef _WIN32 + #if defined(_WIN32) DWORD bytes_written; WriteFile( GetStdHandle(STD_OUTPUT_HANDLE), out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, &bytes_written, NULL); + #elif defined(__hexagon__) + qurt_printf("%s", out_buffer); #else write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); #endif diff --git a/3rdparty/cpuinfo/include/cpuinfo.h b/3rdparty/cpuinfo/include/cpuinfo.h index cffa299ede3e1..3fbcad2a08f31 100644 --- a/3rdparty/cpuinfo/include/cpuinfo.h +++ b/3rdparty/cpuinfo/include/cpuinfo.h @@ -46,6 +46,14 @@ #endif #endif +#if defined(__riscv) + #if (__riscv_xlen == 32) + #define CPUINFO_ARCH_RISCV32 1 + #elif (__riscv_xlen == 64) + #define CPUINFO_ARCH_RISCV64 1 + #endif +#endif + /* Define other architecture-specific macros as 0 */ #ifndef CPUINFO_ARCH_X86 @@ -80,6 +88,14 @@ #define CPUINFO_ARCH_WASMSIMD 0 #endif +#ifndef CPUINFO_ARCH_RISCV32 + #define CPUINFO_ARCH_RISCV32 0 +#endif + +#ifndef CPUINFO_ARCH_RISCV64 + #define CPUINFO_ARCH_RISCV64 0 +#endif + #if CPUINFO_ARCH_X86 && defined(_MSC_VER) #define CPUINFO_ABI __cdecl #elif CPUINFO_ARCH_X86 && defined(__GNUC__) @@ -188,6 +204,8 @@ enum cpuinfo_vendor { * Processors are variants of AMD cores. */ cpuinfo_vendor_hygon = 16, + /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */ + cpuinfo_vendor_sifive = 17, /* Active vendors of embedded CPUs */ @@ -363,6 +381,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_zen2 = 0x0020010A, /** AMD Zen 3 microarchitecture. */ cpuinfo_uarch_zen3 = 0x0020010B, + /** AMD Zen 4 microarchitecture. */ + cpuinfo_uarch_zen4 = 0x0020010C, /** NSC Geode and AMD Geode GX and LX. */ cpuinfo_uarch_geode = 0x00200200, @@ -426,9 +446,26 @@ enum cpuinfo_uarch { cpuinfo_uarch_neoverse_n1 = 0x00300400, /** ARM Neoverse E1. */ cpuinfo_uarch_neoverse_e1 = 0x00300401, + /** ARM Neoverse V1. */ + cpuinfo_uarch_neoverse_v1 = 0x00300402, + /** ARM Neoverse N2. */ + cpuinfo_uarch_neoverse_n2 = 0x00300403, + /** ARM Neoverse V2. */ + cpuinfo_uarch_neoverse_v2 = 0x00300404, /** ARM Cortex-X1. */ - cpuinfo_uarch_cortex_x1 = 0x00300500, + cpuinfo_uarch_cortex_x1 = 0x00300501, + /** ARM Cortex-X2. */ + cpuinfo_uarch_cortex_x2 = 0x00300502, + /** ARM Cortex-X3. */ + cpuinfo_uarch_cortex_x3 = 0x00300503, + + /** ARM Cortex-A510. */ + cpuinfo_uarch_cortex_a510 = 0x00300551, + /** ARM Cortex-A710. */ + cpuinfo_uarch_cortex_a710 = 0x00300571, + /** ARM Cortex-A715. */ + cpuinfo_uarch_cortex_a715 = 0x00300572, /** Qualcomm Scorpion. */ cpuinfo_uarch_scorpion = 0x00400100, @@ -489,10 +526,14 @@ enum cpuinfo_uarch { cpuinfo_uarch_lightning = 0x00700109, /** Apple A13 processor (little cores). */ cpuinfo_uarch_thunder = 0x0070010A, - /** Apple M1 processor (big cores). */ + /** Apple A14 / M1 processor (big cores). */ cpuinfo_uarch_firestorm = 0x0070010B, - /** Apple M1 processor (little cores). */ + /** Apple A14 / M1 processor (little cores). */ cpuinfo_uarch_icestorm = 0x0070010C, + /** Apple A15 / M2 processor (big cores). */ + cpuinfo_uarch_avalanche = 0x0070010D, + /** Apple A15 / M2 processor (little cores). */ + cpuinfo_uarch_blizzard = 0x0070010E, /** Cavium ThunderX. */ cpuinfo_uarch_thunderx = 0x00800100, @@ -706,6 +747,7 @@ void CPUINFO_ABI cpuinfo_deinitialize(void); bool sse4a; bool misaligned_sse; bool avx; + bool avxvnni; bool fma3; bool fma4; bool xop; @@ -725,6 +767,7 @@ void CPUINFO_ABI cpuinfo_deinitialize(void); bool avx512vpopcntdq; bool avx512vnni; bool avx512bf16; + bool avx512fp16; bool avx512vp2intersect; bool avx512_4vnniw; bool avx512_4fmaps; @@ -1052,6 +1095,14 @@ static inline bool cpuinfo_has_x86_avx(void) { #endif } +static inline bool cpuinfo_has_x86_avxvnni(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avxvnni; + #else + return false; + #endif +} + static inline bool cpuinfo_has_x86_fma3(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 return cpuinfo_isa.fma3; @@ -1204,6 +1255,14 @@ static inline bool cpuinfo_has_x86_avx512bf16(void) { #endif } +static inline bool cpuinfo_has_x86_avx512fp16(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512fp16; + #else + return false; + #endif +} + static inline bool cpuinfo_has_x86_avx512vp2intersect(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 return cpuinfo_isa.avx512vp2intersect; @@ -1460,14 +1519,17 @@ static inline bool cpuinfo_has_x86_sha(void) { #endif #if CPUINFO_ARCH_ARM64 bool atomics; + bool bf16; bool sve; bool sve2; + bool i8mm; #endif bool rdm; bool fp16arith; bool dot; bool jscvt; bool fcma; + bool fhm; bool aes; bool sha1; @@ -1623,6 +1685,22 @@ static inline bool cpuinfo_has_arm_vfpv4_d32(void) { #endif } +static inline bool cpuinfo_has_arm_fp16_arith(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_bf16(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; + #else + return false; + #endif +} + static inline bool cpuinfo_has_arm_wmmx(void) { #if CPUINFO_ARCH_ARM return cpuinfo_isa.wmmx; @@ -1705,9 +1783,9 @@ static inline bool cpuinfo_has_arm_neon_fp16_arith(void) { #endif } -static inline bool cpuinfo_has_arm_fp16_arith(void) { +static inline bool cpuinfo_has_arm_fhm(void) { #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 - return cpuinfo_isa.fp16arith; + return cpuinfo_isa.fhm; #else return false; #endif @@ -1721,6 +1799,14 @@ static inline bool cpuinfo_has_arm_neon_dot(void) { #endif } +static inline bool cpuinfo_has_arm_neon_bf16(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; + #else + return false; + #endif +} + static inline bool cpuinfo_has_arm_jscvt(void) { #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 return cpuinfo_isa.jscvt; @@ -1737,6 +1823,14 @@ static inline bool cpuinfo_has_arm_fcma(void) { #endif } +static inline bool cpuinfo_has_arm_i8mm(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.i8mm; + #else + return false; + #endif +} + static inline bool cpuinfo_has_arm_aes(void) { #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 return cpuinfo_isa.aes; @@ -1785,6 +1879,14 @@ static inline bool cpuinfo_has_arm_sve(void) { #endif } +static inline bool cpuinfo_has_arm_sve_bf16(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve && cpuinfo_isa.bf16; + #else + return false; + #endif +} + static inline bool cpuinfo_has_arm_sve2(void) { #if CPUINFO_ARCH_ARM64 return cpuinfo_isa.sve2; @@ -1793,6 +1895,109 @@ static inline bool cpuinfo_has_arm_sve2(void) { #endif } +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + /* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions instead. */ + struct cpuinfo_riscv_isa { + /** + * Keep fields in line with the canonical order as defined by + * Section 27.11 Subset Naming Convention. + */ + /* RV32I/64I/128I Base ISA. */ + bool i; + #if CPUINFO_ARCH_RISCV32 + /* RV32E Base ISA. */ + bool e; + #endif + /* Integer Multiply/Divide Extension. */ + bool m; + /* Atomic Extension. */ + bool a; + /* Single-Precision Floating-Point Extension. */ + bool f; + /* Double-Precision Floating-Point Extension. */ + bool d; + /* Compressed Extension. */ + bool c; + /* Vector Extension. */ + bool v; + }; + + extern struct cpuinfo_riscv_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_riscv_i(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.i; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_e(void) { + #if CPUINFO_ARCH_RISCV32 + return cpuinfo_isa.e; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_m(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.m; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_a(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.a; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_f(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.f; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_d(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.d; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_g(void) { + // The 'G' extension is simply shorthand for 'IMAFD'. + return cpuinfo_has_riscv_i() + && cpuinfo_has_riscv_m() + && cpuinfo_has_riscv_a() + && cpuinfo_has_riscv_f() + && cpuinfo_has_riscv_d(); +} + +static inline bool cpuinfo_has_riscv_c(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.c; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_riscv_v(void) { + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.v; + #else + return false; + #endif +} + const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); diff --git a/3rdparty/cpuinfo/src/api.c b/3rdparty/cpuinfo/src/api.c index f91b421ccfa01..2f70aeffbb069 100644 --- a/3rdparty/cpuinfo/src/api.c +++ b/3rdparty/cpuinfo/src/api.c @@ -30,7 +30,8 @@ uint32_t cpuinfo_packages_count = 0; uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 }; uint32_t cpuinfo_max_cache_size = 0; -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL; uint32_t cpuinfo_uarchs_count = 0; #else @@ -41,7 +42,8 @@ uint32_t cpuinfo_max_cache_size = 0; uint32_t cpuinfo_linux_cpu_max = 0; const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; - #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL; #endif #endif @@ -79,7 +81,8 @@ const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() { if (!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs"); } - #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 return cpuinfo_uarchs; #else return &cpuinfo_global_uarch; @@ -130,7 +133,8 @@ const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) { if (!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch"); } - #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) { return NULL; } @@ -175,7 +179,8 @@ uint32_t cpuinfo_get_uarchs_count(void) { if (!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count"); } - #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 return cpuinfo_uarchs_count; #else return 1; @@ -351,7 +356,8 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index"); } - #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 #ifdef __linux__ if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { /* Special case: avoid syscall on systems with only a single type of cores */ @@ -373,7 +379,7 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { return 0; #endif #else - /* Only ARM/ARM64 processors may include cores of different types in the same package. */ + /* Only ARM/ARM64/RISCV processors may include cores of different types in the same package. */ return 0; #endif } @@ -382,7 +388,8 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t defau if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default"); } - #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 \ + || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 #ifdef __linux__ if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { /* Special case: avoid syscall on systems with only a single type of cores */ @@ -404,7 +411,7 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t defau return default_uarch_index; #endif #else - /* Only ARM/ARM64 processors may include cores of different types in the same package. */ + /* Only ARM/ARM64/RISCV processors may include cores of different types in the same package. */ return 0; #endif } diff --git a/3rdparty/cpuinfo/src/arm/api.h b/3rdparty/cpuinfo/src/arm/api.h index 48b99ddc90069..469c84bd5c549 100644 --- a/3rdparty/cpuinfo/src/arm/api.h +++ b/3rdparty/cpuinfo/src/arm/api.h @@ -28,6 +28,7 @@ enum cpuinfo_arm_chipset_vendor { cpuinfo_arm_chipset_vendor_spreadtrum, cpuinfo_arm_chipset_vendor_telechips, cpuinfo_arm_chipset_vendor_texas_instruments, + cpuinfo_arm_chipset_vendor_unisoc, cpuinfo_arm_chipset_vendor_wondermedia, cpuinfo_arm_chipset_vendor_max, }; @@ -62,6 +63,7 @@ enum cpuinfo_arm_chipset_series { cpuinfo_arm_chipset_series_spreadtrum_sc, cpuinfo_arm_chipset_series_telechips_tcc, cpuinfo_arm_chipset_series_texas_instruments_omap, + cpuinfo_arm_chipset_series_unisoc_t, cpuinfo_arm_chipset_series_wondermedia_wm, cpuinfo_arm_chipset_series_max, }; diff --git a/3rdparty/cpuinfo/src/arm/cache.c b/3rdparty/cpuinfo/src/arm/cache.c index 446b02b9e2b17..953abb72f645a 100644 --- a/3rdparty/cpuinfo/src/arm/cache.c +++ b/3rdparty/cpuinfo/src/arm/cache.c @@ -1238,6 +1238,57 @@ void cpuinfo_arm_decode_cache( }; break; } + case cpuinfo_uarch_neoverse_n1: + case cpuinfo_uarch_neoverse_v1: + case cpuinfo_uarch_neoverse_n2: + case cpuinfo_uarch_neoverse_v2: + { + /* + * The specifications here below are taken from the + * Arm Core Technical Reference Manuals for + * - Neoverse N1: https://developer.arm.com/documentation/100616/0401/?lang=en + * - Neoverse N2: https://developer.arm.com/documentation/102099/0003/?lang=en + * - Neoverse V1: https://developer.arm.com/documentation/101427/0102/?lang=en + * - Neoverse V2: https://developer.arm.com/documentation/102375/0002/?lang=en + * + * All four Arm architectures have L1 memory system with instruction and data caches, + * both of fixed size of 64KB. The instruction side memory system is 4-way set associative + * with a cache line length of 64 bytes. The data cache is also 4-way set associative with + * a cache line length of 64 bytes. + * + * The L2 memory system differs across the four Architectures in the minimum + * length of the L2 cache. Namely: + * - Arm Neoverse N1/N2/V1 have a L2 cache of configurable size of 256KB, 512KB, or 1024KB + * - Arm Neoverse V2 has a L2 cache of configurable size of 1MB or 2MB + * For all four architectures, the L2 cache is 8-way set associative + * For all other information, please refer to the technical manuals linked above + */ + const uint32_t min_l2_size_KB = uarch == cpuinfo_uarch_neoverse_v2 ? 1024 : 256; + const uint32_t min_l3_size_KB = 0; + + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = min_l2_size_KB * 1024, + .associativity = 8, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + *l3 = (struct cpuinfo_cache) { + .size = min_l3_size_KB * 1024, + .associativity = 16, + .line_size = 64, + }; + break; + } #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) case cpuinfo_uarch_scorpion: /* @@ -1655,6 +1706,10 @@ uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* proc */ return 8 * 1024 * 1024; case cpuinfo_uarch_cortex_a55: + case cpuinfo_uarch_neoverse_n1: + case cpuinfo_uarch_neoverse_v1: + case cpuinfo_uarch_neoverse_n2: + case cpuinfo_uarch_neoverse_v2: case cpuinfo_uarch_cortex_a75: case cpuinfo_uarch_cortex_a76: case cpuinfo_uarch_exynos_m4: diff --git a/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c b/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c index df68aa147a478..65c7826fcec6e 100644 --- a/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c +++ b/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c @@ -33,6 +33,13 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( const struct cpuinfo_arm_chipset chipset[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]) { + if (architecture_version < 8) { + const uint32_t armv8_features2_mask = CPUINFO_ARM_LINUX_FEATURE2_AES | CPUINFO_ARM_LINUX_FEATURE2_PMULL | + CPUINFO_ARM_LINUX_FEATURE2_SHA1 | CPUINFO_ARM_LINUX_FEATURE2_SHA2 | CPUINFO_ARM_LINUX_FEATURE2_CRC32; + if (features2 & armv8_features2_mask) { + architecture_version = 8; + } + } if (architecture_version >= 8) { /* * ARMv7 code running on ARMv8: IDIV, VFP, NEON are always supported, @@ -57,13 +64,22 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( * NEON FP16 compute extension and VQRDMLAH/VQRDMLSH instructions are not indicated in /proc/cpuinfo. * Use a MIDR-based heuristic to whitelist processors known to support it: * - Processors with Cortex-A55 cores - * - Processors with Cortex-A65 cores * - Processors with Cortex-A75 cores * - Processors with Cortex-A76 cores * - Processors with Cortex-A77 cores + * - Processors with Cortex-A78 cores + * - Processors with Cortex-A510 cores + * - Processors with Cortex-A710 cores + * - Processors with Cortex-A715 cores + * - Processors with Cortex-X1 cores + * - Processors with Cortex-X2 cores + * - Processors with Cortex-X3 cores * - Processors with Exynos M4 cores * - Processors with Exynos M5 cores * - Neoverse N1 cores + * - Neoverse N2 cores + * - Neoverse V1 cores + * - Neoverse V2 cores */ if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { /* Only little cores of Exynos 9810 support FP16 & RDM */ @@ -71,11 +87,21 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( } else { switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { case UINT32_C(0x4100D050): /* Cortex-A55 */ - case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0A0): /* Cortex-A75 */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D410): /* Cortex-A78 */ + case UINT32_C(0x4100D440): /* Cortex-X1 */ + case UINT32_C(0x4100D460): /* Cortex-A510 */ + case UINT32_C(0x4100D470): /* Cortex-A710 */ + case UINT32_C(0x4100D480): /* Cortex-X2 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ + case UINT32_C(0x4100D4D0): /* Cortex-A715 */ + case UINT32_C(0x4100D4E0): /* Cortex-X3 */ + case UINT32_C(0x4100D4F0): /* Neoverse V2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ @@ -91,25 +117,57 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( /* * NEON VDOT instructions are not indicated in /proc/cpuinfo. - * Use a MIDR-based heuristic to whitelist processors known to support it. + * Use a MIDR-based heuristic to whitelist processors known to support it: + * - Processors with Cortex-A76 cores + * - Processors with Cortex-A77 cores + * - Processors with Cortex-A78 cores + * - Processors with Cortex-A510 cores + * - Processors with Cortex-A710 cores + * - Processors with Cortex-A715 cores + * - Processors with Cortex-X1 cores + * - Processors with Cortex-X2 cores + * - Processors with Cortex-X3 cores + * - Processors with Exynos M4 cores + * - Processors with Exynos M5 cores + * - Neoverse N1 cores + * - Neoverse N2 cores + * - Neoverse V1 cores + * - Neoverse V2 cores */ - switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { - case UINT32_C(0x4100D0B0): /* Cortex-A76 */ - case UINT32_C(0x4100D0D0): /* Cortex-A77 */ - case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ - case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ - case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ - case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ - case UINT32_C(0x53000030): /* Exynos-M4 */ - case UINT32_C(0x53000040): /* Exynos-M5 */ - isa->dot = true; - break; - case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */ - isa->dot = !!(midr_get_variant(midr) >= 1); - break; - case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */ - isa->dot = !!(midr_get_variant(midr) >= 2); - break; + if (chipset->series == cpuinfo_arm_chipset_series_spreadtrum_sc && chipset->model == 9863) { + cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Spreadtrum SC9863A"); + } else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_t && chipset->model == 310) { + cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc T310"); + } else { + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0C0): /* Neoverse N1 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D410): /* Cortex-A78 */ + case UINT32_C(0x4100D440): /* Cortex-X1 */ + case UINT32_C(0x4100D460): /* Cortex-A510 */ + case UINT32_C(0x4100D470): /* Cortex-A710 */ + case UINT32_C(0x4100D480): /* Cortex-X2 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ + case UINT32_C(0x4100D4D0): /* Cortex-A715 */ + case UINT32_C(0x4100D4E0): /* Cortex-X3 */ + case UINT32_C(0x4100D4F0): /* Neoverse V2 */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos M4 */ + case UINT32_C(0x53000040): /* Exynos M5 */ + isa->dot = true; + break; + case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 1); + break; + case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 2); + break; + } } } else { /* ARMv7 or lower: use feature flags to detect optional features */ @@ -167,19 +225,24 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( } if (features & CPUINFO_ARM_LINUX_FEATURE_IWMMXT) { - const uint32_t wcid = read_wcid(); - cpuinfo_log_debug("WCID = 0x%08"PRIx32, wcid); - const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF); - if (coprocessor_type >= 0x10) { - isa->wmmx = true; - if (coprocessor_type >= 0x20) { - isa->wmmx2 = true; + #if !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) + const uint32_t wcid = read_wcid(); + cpuinfo_log_debug("WCID = 0x%08"PRIx32, wcid); + const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF); + if (coprocessor_type >= 0x10) { + isa->wmmx = true; + if (coprocessor_type >= 0x20) { + isa->wmmx2 = true; + } + } else { + cpuinfo_log_warning("WMMX ISA disabled: OS reported iwmmxt feature, " + "but WCID coprocessor type 0x%"PRIx32" indicates no WMMX support", + coprocessor_type); } - } else { + #else cpuinfo_log_warning("WMMX ISA disabled: OS reported iwmmxt feature, " - "but WCID coprocessor type 0x%"PRIx32" indicates no WMMX support", - coprocessor_type); - } + "but there is no iWMMXt coprocessor"); + #endif } if ((features & CPUINFO_ARM_LINUX_FEATURE_THUMB) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_T)) { diff --git a/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c b/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c index 2000e1a156721..5dd4c4d07ae14 100644 --- a/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c +++ b/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c @@ -41,6 +41,9 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( * - Processors with Exynos M4 cores * - Processors with Exynos M5 cores * - Neoverse N1 cores + * - Neoverse V1 cores + * - Neoverse N2 cores + * - Neoverse V2 cores */ if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { /* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */ @@ -50,10 +53,14 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0A0): /* Cortex-A75 */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ + case UINT32_C(0x4100D4F0): /* Neoverse V2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ @@ -78,6 +85,9 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( break; } } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_I8MM) { + isa->i8mm = true; + } /* * Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions. @@ -89,7 +99,10 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D400): /* Neoverse V1 */ + case UINT32_C(0x4100D490): /* Neoverse N2 */ case UINT32_C(0x4100D4A0): /* Neoverse E1 */ + case UINT32_C(0x4100D4F0): /* Neoverse V2 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ @@ -124,4 +137,13 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) { isa->sve2 = true; } + // SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 feature flag + // was added in Linux kernel before the BF16 feature flag, so we check for either. + if (features2 & (CPUINFO_ARM_LINUX_FEATURE2_BF16 | CPUINFO_ARM_LINUX_FEATURE2_SVEBF16)) { + isa->bf16 = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) { + isa->fhm = true; + } } + diff --git a/3rdparty/cpuinfo/src/arm/linux/api.h b/3rdparty/cpuinfo/src/arm/linux/api.h index 1c09f827e3436..2e849431d3f5a 100644 --- a/3rdparty/cpuinfo/src/arm/linux/api.h +++ b/3rdparty/cpuinfo/src/arm/linux/api.h @@ -314,7 +314,7 @@ CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo( struct cpuinfo_arm_isa isa[restrict static 1]); #endif -#ifdef __ANDROID__ +#if defined(__ANDROID__) CPUINFO_INTERNAL struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset( const struct cpuinfo_android_properties properties[restrict static 1], diff --git a/3rdparty/cpuinfo/src/arm/linux/chipset.c b/3rdparty/cpuinfo/src/arm/linux/chipset.c index e36283c55fa51..0e9191fd9c1f8 100644 --- a/3rdparty/cpuinfo/src/arm/linux/chipset.c +++ b/3rdparty/cpuinfo/src/arm/linux/chipset.c @@ -37,29 +37,17 @@ static inline bool is_ascii_numeric(char c) { } static inline uint16_t load_u16le(const void* ptr) { -#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) - return *((const uint16_t*) ptr); -#else const uint8_t* byte_ptr = (const uint8_t*) ptr; return ((uint16_t) byte_ptr[1] << 8) | (uint16_t) byte_ptr[0]; -#endif } static inline uint32_t load_u24le(const void* ptr) { -#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) - return ((uint32_t) ((const uint8_t*) ptr)[2] << 16) | ((uint32_t) *((const uint16_t*) ptr)); -#else - const uint8_t* byte_ptr = (const uint8_t*) ptr; - return ((uint32_t) byte_ptr[2] << 16) | ((uint32_t) byte_ptr[1] << 8) | (uint32_t) byte_ptr[0]; -#endif + return ((uint32_t) ((const uint8_t*) ptr)[2] << 16) | (uint32_t) load_u16le(ptr); } static inline uint32_t load_u32le(const void* ptr) { -#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) - return *((const uint32_t*) ptr); -#else - return ((uint32_t) ((const uint8_t*) ptr)[3] << 24) | load_u24le(ptr); -#endif + const uint8_t* byte_ptr = (const uint8_t*) ptr; + return ((uint32_t) byte_ptr[3] << 24) | ((uint32_t) byte_ptr[2] << 16) | ((uint32_t) byte_ptr[1] << 8) | (uint32_t) byte_ptr[0]; } /* @@ -96,6 +84,7 @@ static enum cpuinfo_arm_chipset_vendor chipset_series_vendor[cpuinfo_arm_chipset [cpuinfo_arm_chipset_series_spreadtrum_sc] = cpuinfo_arm_chipset_vendor_spreadtrum, [cpuinfo_arm_chipset_series_telechips_tcc] = cpuinfo_arm_chipset_vendor_telechips, [cpuinfo_arm_chipset_series_texas_instruments_omap] = cpuinfo_arm_chipset_vendor_texas_instruments, + [cpuinfo_arm_chipset_series_unisoc_t] = cpuinfo_arm_chipset_vendor_unisoc, [cpuinfo_arm_chipset_series_wondermedia_wm] = cpuinfo_arm_chipset_vendor_wondermedia, }; @@ -877,6 +866,63 @@ static bool match_sc( return true; } +/** + * Tries to match, case-sentitively, /Unisoc T\d{3,4}/ signature for Unisoc T chipset. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, + * ro.board.platform, or ro.chipname) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, + * ro.board.platform, or ro.chipname) to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_t( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect 11-12 symbols: "Unisoc T" (8 symbols) + 3-4-digit model number */ + const size_t length = end - start; + switch (length) { + case 11: + case 12: + break; + default: + return false; + } + + /* Check that string starts with "Unisoc T". The first four characters are loaded as 32-bit little endian word */ + const uint32_t expected_unis = load_u32le(start); + if (expected_unis != UINT32_C(0x73696E55) /* "sinU" = reverse("Unis") */) { + return false; + } + + /* The next four characters are loaded as 32-bit little endian word */ + const uint32_t expected_oc_t = load_u32le(start + 4); + if (expected_oc_t != UINT32_C(0x5420636F) /* "T co" = reverse("oc T") */) { + return false; + } + + /* Validate and parse 3-4 digit model number */ + uint32_t model = 0; + for (uint32_t i = 8; i < length; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unisoc, + .series = cpuinfo_arm_chipset_series_unisoc_t, + .model = model, + }; + return true; +} + /** * Tries to match /lc\d{4}[a-z]?$/ signature for Leadcore LC chipsets. * If match successful, extracts model information into \p chipset argument. @@ -891,7 +937,7 @@ static bool match_lc( const char* start, const char* end, struct cpuinfo_arm_chipset chipset[restrict static 1]) { - /* Expect at 6-7 symbols: "lc" (2 symbols) + 4-digit model number + optional 1-letter suffix */ + /* Expect 6-7 symbols: "lc" (2 symbols) + 4-digit model number + optional 1-letter suffix */ const size_t length = end - start; switch (length) { case 6: @@ -1351,7 +1397,7 @@ static bool match_and_parse_sunxi( return false; } - /* Compare sunXi platform id and number of cores to tabluted values to decode chipset name */ + /* Compare sunXi platform id and number of cores to tabulated values to decode chipset name */ uint32_t model = 0; char suffix = 0; for (size_t i = 0; i < CPUINFO_COUNT_OF(sunxi_map_entries); i++) { @@ -2354,6 +2400,16 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha cpuinfo_log_debug( "matched Spreadtrum SC signature in /proc/cpuinfo Hardware string \"%.*s\"", (int) hardware_length, hardware); + + return chipset; + } + + /* Check Unisoc T signature */ + if (match_t(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Unisoc T signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; } @@ -3417,6 +3473,7 @@ static const char* chipset_vendor_string[cpuinfo_arm_chipset_vendor_max] = { [cpuinfo_arm_chipset_vendor_spreadtrum] = "Spreadtrum", [cpuinfo_arm_chipset_vendor_telechips] = "Telechips", [cpuinfo_arm_chipset_vendor_texas_instruments] = "Texas Instruments", + [cpuinfo_arm_chipset_vendor_unisoc] = "Unisoc", [cpuinfo_arm_chipset_vendor_wondermedia] = "WonderMedia", }; @@ -3451,6 +3508,7 @@ static const char* chipset_series_string[cpuinfo_arm_chipset_series_max] = { [cpuinfo_arm_chipset_series_spreadtrum_sc] = "SC", [cpuinfo_arm_chipset_series_telechips_tcc] = "TCC", [cpuinfo_arm_chipset_series_texas_instruments_omap] = "OMAP", + [cpuinfo_arm_chipset_series_unisoc_t] = "T", [cpuinfo_arm_chipset_series_wondermedia_wm] = "WM", }; @@ -3484,7 +3542,7 @@ void cpuinfo_arm_chipset_to_string( } } -#ifdef __ANDROID__ +#if defined(__ANDROID__) static inline struct cpuinfo_arm_chipset disambiguate_qualcomm_chipset( const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], @@ -3776,7 +3834,7 @@ void cpuinfo_arm_chipset_to_string( */ void cpuinfo_arm_fixup_raspberry_pi_chipset( struct cpuinfo_arm_chipset chipset[restrict static 1], - const char revision[restrict static CPUINFO_HARDWARE_VALUE_MAX]) + const char revision[restrict static CPUINFO_REVISION_VALUE_MAX]) { const size_t revision_length = strnlen(revision, CPUINFO_REVISION_VALUE_MAX); diff --git a/3rdparty/cpuinfo/src/arm/linux/clusters.c b/3rdparty/cpuinfo/src/arm/linux/clusters.c index c7a40457d28d3..430773d1d1748 100644 --- a/3rdparty/cpuinfo/src/arm/linux/clusters.c +++ b/3rdparty/cpuinfo/src/arm/linux/clusters.c @@ -48,7 +48,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags. * @param max_processors - number of elements in the @p processors array. * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum - * frequency, MIDR infromation, and core cluster (package siblings list) information. + * frequency, MIDR information, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. * @retval false if known details about processors contradict the heuristic configuration of core clusters. @@ -292,9 +292,9 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( * - Processors assigned to these clusters stay assigned to the same clusters * - No new processors are added to these clusters * - Processors without pre-assigned cluster are clustered in one sequential scan: - * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding - * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor. - * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding + * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceding + * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceding processor. + * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceding * processor, the processor is assigned to a newly created cluster. * * The function must be called after parsing OS-provided information on core clusters, and usually is called only @@ -309,7 +309,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( * * @param max_processors - number of elements in the @p processors array. * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum - * frequency, MIDR infromation, and core cluster (package siblings list) information. + * frequency, MIDR information, and core cluster (package siblings list) information. * * @retval true if the heuristic successfully assigned all processors into clusters of cores. * @retval false if known details about processors contradict the heuristic configuration of core clusters. @@ -331,7 +331,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { if (cluster_min_frequency != processors[i].min_frequency) { cpuinfo_log_info( - "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); " + "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); " "processor %"PRIu32" starts to a new cluster", i, processors[i].min_frequency, cluster_min_frequency, i); goto new_cluster; @@ -346,7 +346,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { if (cluster_max_frequency != processors[i].max_frequency) { cpuinfo_log_debug( - "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); " + "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceding cluster (%"PRIu32" KHz); " "processor %"PRIu32" starts a new cluster", i, processors[i].max_frequency, cluster_max_frequency, i); goto new_cluster; @@ -361,7 +361,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) { cpuinfo_log_debug( - "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); " + "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceding cluster (0x%02"PRIx32"); " "processor %"PRIu32" starts to a new cluster", i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i); goto new_cluster; @@ -417,11 +417,11 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( } } - /* All checks passed, attach processor to the preceeding cluster */ + /* All checks passed, attach processor to the preceding cluster */ cluster_processors++; processors[i].package_leader_id = cluster_start; processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; - cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start); + cpuinfo_log_debug("assigned processor %"PRIu32" to preceding cluster of processor %"PRIu32, i, cluster_start); continue; new_cluster: diff --git a/3rdparty/cpuinfo/src/arm/linux/cp.h b/3rdparty/cpuinfo/src/arm/linux/cp.h index 63940ec5d85f1..0abd7d61e8e9a 100644 --- a/3rdparty/cpuinfo/src/arm/linux/cp.h +++ b/3rdparty/cpuinfo/src/arm/linux/cp.h @@ -35,10 +35,16 @@ return mvfr0; } #endif - - static inline uint32_t read_wcid(void) { - uint32_t wcid; - __asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r" (wcid)); - return wcid; - } + #if !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) + /* + * In ARMv8, AArch32 state supports only conceptual coprocessors CP10, CP11, CP14, and CP15. + * AArch64 does not support the concept of coprocessors. + * and clang refuses to compile inline assembly when targeting ARMv8+ + */ + static inline uint32_t read_wcid(void) { + uint32_t wcid; + __asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r" (wcid)); + return wcid; + } + #endif #endif diff --git a/3rdparty/cpuinfo/src/arm/linux/cpuinfo.c b/3rdparty/cpuinfo/src/arm/linux/cpuinfo.c index 90e1631ee1c00..b7805b5ef418a 100644 --- a/3rdparty/cpuinfo/src/arm/linux/cpuinfo.c +++ b/3rdparty/cpuinfo/src/arm/linux/cpuinfo.c @@ -177,6 +177,10 @@ static void parse_features( #if CPUINFO_ARCH_ARM64 processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA; #endif + } else if (memcmp(feature_start, "i8mm", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_I8MM; + #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "half", feature_length) == 0) { processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF; @@ -283,6 +287,10 @@ static void parse_features( #if CPUINFO_ARCH_ARM64 processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM; #endif + } else if (memcmp(feature_start, "asimdfhm", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM; + #endif #if CPUINFO_ARCH_ARM } else if (memcmp(feature_start, "fastmult", feature_length) == 0) { processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT; @@ -896,6 +904,7 @@ bool cpuinfo_arm_linux_parse_proc_cpuinfo( uint32_t max_processors_count, struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]) { + hardware[0] = '\0'; struct proc_cpuinfo_parser_state state = { .hardware = hardware, .revision = revision, diff --git a/3rdparty/cpuinfo/src/arm/linux/hwcap.c b/3rdparty/cpuinfo/src/arm/linux/hwcap.c index 35e9994f6939e..984ab43c52197 100644 --- a/3rdparty/cpuinfo/src/arm/linux/hwcap.c +++ b/3rdparty/cpuinfo/src/arm/linux/hwcap.c @@ -1,3 +1,4 @@ +#include #include #include @@ -15,7 +16,8 @@ #include #include -#if CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_ARM && !defined(__ANDROID__) +#if CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_ARM && \ + defined(__GLIBC__) && defined(__GLIBC_MINOR__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 16) #include #else #define AT_HWCAP 16 @@ -74,11 +76,13 @@ libc = NULL; } return getauxval != NULL; - #else - /* GNU/Linux: getauxval is always supported */ + #elif defined(__GLIBC__) && defined(__GLIBC_MINOR__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 16) + /* GNU/Linux: getauxval is supported since glibc-2.16 */ *hwcap = getauxval(AT_HWCAP); *hwcap2 = getauxval(AT_HWCAP2); return true; + #else + return false; #endif } diff --git a/3rdparty/cpuinfo/src/arm/linux/init.c b/3rdparty/cpuinfo/src/arm/linux/init.c index 23d84399678f7..2501f39c8dddb 100644 --- a/3rdparty/cpuinfo/src/arm/linux/init.c +++ b/3rdparty/cpuinfo/src/arm/linux/init.c @@ -199,9 +199,13 @@ void cpuinfo_arm_linux_init(void) { for (uint32_t i = 0; i < arm_linux_processors_count; i++) { arm_linux_processors[i].system_processor_id = i; if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - valid_processors += 1; - - if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) { + if (arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR) { + /* + * Processor is in possible and present lists, and also reported in /proc/cpuinfo. + * This processor is availble for compute. + */ + valid_processors += 1; + } else { /* * Processor is in possible and present lists, but not reported in /proc/cpuinfo. * This is fairly common: high-index processors can be not reported if they are offline. @@ -510,7 +514,7 @@ void cpuinfo_arm_linux_init(void) { uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; /* Indication whether L3 (if it exists) is shared between all cores */ bool shared_l3 = true; - /* Populate cache infromation structures in l1i, l1d */ + /* Populate cache information structures in l1i, l1d */ for (uint32_t i = 0; i < valid_processors; i++) { if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { cluster_id += 1; diff --git a/3rdparty/cpuinfo/src/arm/linux/midr.c b/3rdparty/cpuinfo/src/arm/linux/midr.c index 2c3116b624fb2..0d8f03fa82f39 100644 --- a/3rdparty/cpuinfo/src/arm/linux/midr.c +++ b/3rdparty/cpuinfo/src/arm/linux/midr.c @@ -675,10 +675,10 @@ static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic( /* * Initializes MIDR for leaders of core clusters in a single sequential scan: - * - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value. + * - Clusters preceding the first reported MIDR value are assumed to have default MIDR value. * - Clusters following any reported MIDR value to have that MIDR value. * - * @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value. + * @param default_midr - MIDR value that will be assigned to cluster leaders preceding any reported MIDR value. * @param processors_count - number of logical processor descriptions in the @p processors array. * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency, * and decoded core cluster (package_leader_id) information. @@ -833,7 +833,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr( * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration, * and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known. * 3. Initialize MIDRs for core clusters in a single sequential scan: - * - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value. + * - Clusters preceding the first reported MIDR value are assumed to have the last reported MIDR value. * - Clusters following any reported MIDR value to have that MIDR value. */ diff --git a/3rdparty/cpuinfo/src/arm/mach/init.c b/3rdparty/cpuinfo/src/arm/mach/init.c index dbea578c43428..6a28b2db2f8c0 100644 --- a/3rdparty/cpuinfo/src/arm/mach/init.c +++ b/3rdparty/cpuinfo/src/arm/mach/init.c @@ -15,43 +15,25 @@ #include /* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */ -#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL - #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6 -#endif #ifndef CPUFAMILY_ARM_VORTEX_TEMPEST - #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F + #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F #endif #ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER - #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 + #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 #endif #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3 #endif +#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD + #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D +#endif struct cpuinfo_arm_isa cpuinfo_isa = { -#if CPUINFO_ARCH_ARM - .thumb = true, - .thumb2 = true, - .thumbee = false, - .jazelle = false, - .armv5e = true, - .armv6 = true, - .armv6k = true, - .armv7 = true, - .vfpv2 = false, - .vfpv3 = true, - .d32 = true, - .wmmx = false, - .wmmx2 = false, - .neon = true, -#endif -#if CPUINFO_ARCH_ARM64 .aes = true, .sha1 = true, .sha2 = true, .pmull = true, .crc32 = true, -#endif }; static uint32_t get_sys_info(int type_specifier, const char* name) { @@ -83,10 +65,8 @@ static uint32_t get_sys_info_by_name(const char* type_specifier) { return result; } -static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) { +static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, uint32_t core_count) { switch (cpu_family) { - case CPUFAMILY_ARM_SWIFT: - return cpuinfo_uarch_swift; case CPUFAMILY_ARM_CYCLONE: return cpuinfo_uarch_cyclone; case CPUFAMILY_ARM_TYPHOON: @@ -107,25 +87,15 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype case CPUFAMILY_ARM_FIRESTORM_ICESTORM: /* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */ return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm; + case CPUFAMILY_ARM_AVALANCHE_BLIZZARD: + /* Hexa-core: 2x Avalanche + 4x Blizzard */ + return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard; default: /* Use hw.cpusubtype for detection */ break; } - #if CPUINFO_ARCH_ARM - switch (cpu_subtype) { - case CPU_SUBTYPE_ARM_V7: - return cpuinfo_uarch_cortex_a8; - case CPU_SUBTYPE_ARM_V7F: - return cpuinfo_uarch_cortex_a9; - case CPU_SUBTYPE_ARM_V7K: - return cpuinfo_uarch_cortex_a7; - default: - return cpuinfo_uarch_unknown; - } - #else - return cpuinfo_uarch_unknown; - #endif + return cpuinfo_uarch_unknown; } static void decode_package_name(char* package_name) { @@ -299,71 +269,118 @@ void cpuinfo_arm_mach_init(void) { const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); - const uint32_t cpu_type = get_sys_info_by_name("hw.cputype"); - const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype"); - switch (cpu_type) { - case CPU_TYPE_ARM64: - cpuinfo_isa.aes = true; - cpuinfo_isa.sha1 = true; - cpuinfo_isa.sha2 = true; - cpuinfo_isa.pmull = true; - cpuinfo_isa.crc32 = true; - break; -#if CPUINFO_ARCH_ARM - case CPU_TYPE_ARM: - switch (cpu_subtype) { - case CPU_SUBTYPE_ARM_V8: - cpuinfo_isa.armv8 = true; - cpuinfo_isa.aes = true; - cpuinfo_isa.sha1 = true; - cpuinfo_isa.sha2 = true; - cpuinfo_isa.pmull = true; - cpuinfo_isa.crc32 = true; - /* Fall-through to add ARMv7S features */ - case CPU_SUBTYPE_ARM_V7S: - case CPU_SUBTYPE_ARM_V7K: - cpuinfo_isa.fma = true; - /* Fall-through to add ARMv7F features */ - case CPU_SUBTYPE_ARM_V7F: - cpuinfo_isa.armv7mp = true; - cpuinfo_isa.fp16 = true; - /* Fall-through to add ARMv7 features */ - case CPU_SUBTYPE_ARM_V7: - break; - default: - break; - } - break; -#endif - } + /* - * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via - * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments - * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf), - * but on new iOS versions these calls just fail with EPERM. - * - * Thus, we whitelist CPUs known to support these instructions. + * iOS 15 and macOS 12 added sysctls for ARM features, use them where possible. + * Otherwise, fallback to hardcoded set of CPUs with known support. */ - switch (cpu_family) { - case CPUFAMILY_ARM_MONSOON_MISTRAL: - case CPUFAMILY_ARM_VORTEX_TEMPEST: - case CPUFAMILY_ARM_LIGHTNING_THUNDER: - case CPUFAMILY_ARM_FIRESTORM_ICESTORM: - #if CPUINFO_ARCH_ARM64 + const uint32_t has_feat_lse = get_sys_info_by_name("hw.optional.arm.FEAT_LSE"); + if (has_feat_lse != 0) { + cpuinfo_isa.atomics = true; + } else { + // Mandatory in ARMv8.1-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: cpuinfo_isa.atomics = true; - #endif - cpuinfo_isa.fp16arith = true; + } } - /* - * There does not yet seem to exist an OS mechanism to detect support for - * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs - * known to support these instruction. - */ - switch (cpu_family) { - case CPUFAMILY_ARM_LIGHTNING_THUNDER: - case CPUFAMILY_ARM_FIRESTORM_ICESTORM: - cpuinfo_isa.dot = true; + const uint32_t has_feat_rdm = get_sys_info_by_name("hw.optional.arm.FEAT_RDM"); + if (has_feat_rdm != 0) { + cpuinfo_isa.rdm = true; + } else { + // Optional in ARMv8.2-A (implemented in Apple cores), + // list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.rdm = true; + } + } + + const uint32_t has_feat_fp16 = get_sys_info_by_name("hw.optional.arm.FEAT_FP16"); + if (has_feat_fp16 != 0) { + cpuinfo_isa.fp16arith = true; + } else { + // Optional in ARMv8.2-A (implemented in Apple cores), + // list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.fp16arith = true; + } + } + + const uint32_t has_feat_fhm = get_sys_info_by_name("hw.optional.arm.FEAT_FHM"); + if (has_feat_fhm != 0) { + cpuinfo_isa.fhm = true; + } else { + // Prior to iOS 15, use 'hw.optional.armv8_2_fhm' + const uint32_t has_feat_fhm_legacy = get_sys_info_by_name("hw.optional.armv8_2_fhm"); + if (has_feat_fhm_legacy != 0) { + cpuinfo_isa.fhm = true; + } else { + // Mandatory in ARMv8.4-A when FP16 arithmetics is implemented, + // list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.fhm = true; + } + } + } + + const uint32_t has_feat_bf16 = get_sys_info_by_name("hw.optional.arm.FEAT_BF16"); + if (has_feat_bf16 != 0) { + cpuinfo_isa.bf16 = true; + } + + const uint32_t has_feat_fcma = get_sys_info_by_name("hw.optional.arm.FEAT_FCMA"); + if (has_feat_fcma != 0) { + cpuinfo_isa.fcma = true; + } else { + // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.fcma = true; + } + } + + const uint32_t has_feat_jscvt = get_sys_info_by_name("hw.optional.arm.FEAT_JSCVT"); + if (has_feat_jscvt != 0) { + cpuinfo_isa.jscvt = true; + } else { + // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.jscvt = true; + } + } + + const uint32_t has_feat_dotprod = get_sys_info_by_name("hw.optional.arm.FEAT_DotProd"); + if (has_feat_dotprod != 0) { + cpuinfo_isa.dot = true; + } else { + // Mandatory in ARMv8.4-A, list only cores released before iOS 15 / macOS 12 + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.dot = true; + } + } + + const uint32_t has_feat_i8mm = get_sys_info_by_name("hw.optional.arm.FEAT_I8MM"); + if (has_feat_i8mm != 0) { + cpuinfo_isa.i8mm = true; } uint32_t num_clusters = 1; @@ -374,7 +391,7 @@ void cpuinfo_arm_mach_init(void) { .core_id = i % cores_per_package, .package = packages + i / cores_per_package, .vendor = cpuinfo_vendor_apple, - .uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores), + .uarch = decode_uarch(cpu_family, i, mach_topology.cores), }; if (i != 0 && cores[i].uarch != cores[i - 1].uarch) { num_clusters++; diff --git a/3rdparty/cpuinfo/src/arm/midr.h b/3rdparty/cpuinfo/src/arm/midr.h index 739dc1906e6b7..7255cfcf9f900 100644 --- a/3rdparty/cpuinfo/src/arm/midr.h +++ b/3rdparty/cpuinfo/src/arm/midr.h @@ -174,23 +174,27 @@ inline static uint32_t midr_score_core(uint32_t midr) { case UINT32_C(0x53000030): /* Exynos M4 */ case UINT32_C(0x53000040): /* Exynos M5 */ case UINT32_C(0x4100D440): /* Cortex-X1 */ - /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */ + case UINT32_C(0x4100D480): /* Cortex-X2 */ + case UINT32_C(0x4100D4E0): /* Cortex-X3 */ + /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78/-A710/-A715 */ return 6; + case UINT32_C(0x4100D080): /* Cortex-A72 */ + case UINT32_C(0x4100D090): /* Cortex-A73 */ + case UINT32_C(0x4100D0A0): /* Cortex-A75 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D410): /* Cortex-A78 */ + case UINT32_C(0x4100D470): /* Cortex-A710 */ + case UINT32_C(0x4100D4D0): /* Cortex-A715 */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x4E000030): /* Denver 2 */ + case UINT32_C(0x51002050): /* Kryo Gold */ + case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ + case UINT32_C(0x51008020): /* Kryo 385 Gold */ + case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */ case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */ case UINT32_C(0x53000020): /* Exynos M3 */ - case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */ - case UINT32_C(0x51008020): /* Kryo 385 Gold */ - case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ - case UINT32_C(0x51002050): /* Kryo Gold */ - case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ - case UINT32_C(0x4100D410): /* Cortex-A78 */ - case UINT32_C(0x4100D0D0): /* Cortex-A77 */ - case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ - case UINT32_C(0x4100D0B0): /* Cortex-A76 */ - case UINT32_C(0x4100D0A0): /* Cortex-A75 */ - case UINT32_C(0x4100D090): /* Cortex-A73 */ - case UINT32_C(0x4100D080): /* Cortex-A72 */ #if CPUINFO_ARCH_ARM case UINT32_C(0x4100C0F0): /* Cortex-A15 */ case UINT32_C(0x4100C0E0): /* Cortex-A17 */ @@ -205,8 +209,9 @@ inline static uint32_t midr_score_core(uint32_t midr) { #if CPUINFO_ARCH_ARM64 case UINT32_C(0x4100D060): /* Cortex-A65 */ #endif /* CPUINFO_ARCH_ARM64 */ - case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D030): /* Cortex-A53 */ + case UINT32_C(0x4100D050): /* Cortex-A55 */ + case UINT32_C(0x4100D460): /* Cortex-A510 */ /* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */ return 2; case UINT32_C(0x4100D040): /* Cortex-A35 */ diff --git a/3rdparty/cpuinfo/src/arm/uarch.c b/3rdparty/cpuinfo/src/arm/uarch.c index 8b5362b9848dd..f1dd49349a0ea 100644 --- a/3rdparty/cpuinfo/src/arm/uarch.c +++ b/3rdparty/cpuinfo/src/arm/uarch.c @@ -80,28 +80,50 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD0B: *uarch = cpuinfo_uarch_cortex_a76; break; -#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) case 0xD0C: *uarch = cpuinfo_uarch_neoverse_n1; break; -#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ case 0xD0D: *uarch = cpuinfo_uarch_cortex_a77; break; case 0xD0E: /* Cortex-A76AE */ *uarch = cpuinfo_uarch_cortex_a76; break; + case 0xD40: /* Neoverse V1 */ + *uarch = cpuinfo_uarch_neoverse_v1; + break; case 0xD41: /* Cortex-A78 */ *uarch = cpuinfo_uarch_cortex_a78; break; case 0xD44: /* Cortex-X1 */ *uarch = cpuinfo_uarch_cortex_x1; break; -#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD46: /* Cortex-A510 */ + *uarch = cpuinfo_uarch_cortex_a510; + break; + case 0xD47: /* Cortex-A710 */ + *uarch = cpuinfo_uarch_cortex_a710; + break; + case 0xD48: /* Cortex-X2 */ + *uarch = cpuinfo_uarch_cortex_x2; + break; + case 0xD49: /* Neoverse N2 */ + *uarch = cpuinfo_uarch_neoverse_n2; + break; +#if CPUINFO_ARCH_ARM64 case 0xD4A: *uarch = cpuinfo_uarch_neoverse_e1; break; -#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ +#endif /* CPUINFO_ARCH_ARM64 */ + case 0xD4D: /* Cortex-A715 */ + *uarch = cpuinfo_uarch_cortex_a715; + break; + case 0xD4E: /* Cortex-X3 */ + *uarch = cpuinfo_uarch_cortex_x3; + break; + case 0xD4F: /* Neoverse V2 */ + *uarch = cpuinfo_uarch_neoverse_v2; + break; default: switch (midr_get_part(midr) >> 8) { #if CPUINFO_ARCH_ARM @@ -129,18 +151,18 @@ void cpuinfo_arm_decode_vendor_uarch( case 0x100: *uarch = cpuinfo_uarch_brahma_b53; break; -#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) +#if CPUINFO_ARCH_ARM64 case 0x516: /* Broadcom Vulkan was sold to Cavium before it reached the market, so we identify it as Cavium ThunderX2 */ *vendor = cpuinfo_vendor_cavium; *uarch = cpuinfo_uarch_thunderx2; break; -#endif +#endif /* CPUINFO_ARCH_ARM64 */ default: cpuinfo_log_warning("unknown Broadcom CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; -#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) +#if CPUINFO_ARCH_ARM64 case 'C': *vendor = cpuinfo_vendor_cavium; switch (midr_get_part(midr)) { @@ -157,15 +179,15 @@ void cpuinfo_arm_decode_vendor_uarch( cpuinfo_log_warning("unknown Cavium CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; -#endif +#endif /* CPUINFO_ARCH_ARM64 */ case 'H': *vendor = cpuinfo_vendor_huawei; switch (midr_get_part(midr)) { -#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) +#if CPUINFO_ARCH_ARM64 case 0xD01: /* Kunpeng 920 series */ *uarch = cpuinfo_uarch_taishan_v110; break; -#endif +#endif /* CPUINFO_ARCH_ARM64 */ case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */ *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a76; @@ -204,7 +226,6 @@ void cpuinfo_arm_decode_vendor_uarch( cpuinfo_log_warning("unknown Nvidia CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; -#if !defined(__ANDROID__) case 'P': *vendor = cpuinfo_vendor_apm; switch (midr_get_part(midr)) { @@ -215,7 +236,6 @@ void cpuinfo_arm_decode_vendor_uarch( cpuinfo_log_warning("unknown Applied Micro CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } break; -#endif case 'Q': *vendor = cpuinfo_vendor_qualcomm; switch (midr_get_part(midr)) { @@ -282,14 +302,14 @@ void cpuinfo_arm_decode_vendor_uarch( *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a55; break; -#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) +#if CPUINFO_ARCH_ARM64 case 0xC00: *uarch = cpuinfo_uarch_falkor; break; case 0xC01: *uarch = cpuinfo_uarch_saphira; break; -#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ +#endif /* CPUINFO_ARCH_ARM64 */ default: cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); } diff --git a/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c b/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c new file mode 100644 index 0000000000000..b7b3990c15cba --- /dev/null +++ b/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c @@ -0,0 +1,901 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "windows-arm-init.h" + +#define MAX_NR_OF_CACHES (cpuinfo_cache_level_max - 1) + +/* Call chain: + * cpu_info_init_by_logical_sys_info + * read_packages_for_processors + * read_cores_for_processors + * read_caches_for_processors + * read_all_logical_processor_info_of_relation + * parse_relation_processor_info + * store_package_info_per_processor + * store_core_info_per_processor + * parse_relation_cache_info + * store_cache_info_per_processor + */ + +static uint32_t count_logical_processors( + const uint32_t max_group_count, + uint32_t* global_proc_index_per_group); + +static uint32_t read_packages_for_processors( + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + const uint32_t* global_proc_index_per_group, + const struct woa_chip_info *chip_info); + +static uint32_t read_cores_for_processors( + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + const uint32_t* global_proc_index_per_group, + struct cpuinfo_core* cores, + const struct woa_chip_info *chip_info); + +static uint32_t read_caches_for_processors( + struct cpuinfo_processor *processors, + const uint32_t number_of_processors, + struct cpuinfo_cache *caches, + uint32_t* numbers_of_caches, + const uint32_t* global_proc_index_per_group, + const struct woa_chip_info *chip_info); + +static uint32_t read_all_logical_processor_info_of_relation( + LOGICAL_PROCESSOR_RELATIONSHIP info_type, + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + struct cpuinfo_cache* caches, + uint32_t* numbers_of_caches, + struct cpuinfo_core* cores, + const uint32_t* global_proc_index_per_group, + const struct woa_chip_info *chip_info); + +static bool parse_relation_processor_info( + struct cpuinfo_processor* processors, + uint32_t nr_of_processors, + const uint32_t* global_proc_index_per_group, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info, + const uint32_t info_id, + struct cpuinfo_core* cores, + const struct woa_chip_info *chip_info); + +static bool parse_relation_cache_info( + struct cpuinfo_processor* processors, + struct cpuinfo_cache* caches, + uint32_t* numbers_of_caches, + const uint32_t* global_proc_index_per_group, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info); + +static void store_package_info_per_processor( + struct cpuinfo_processor* processors, + const uint32_t processor_global_index, + const uint32_t package_id, + const uint32_t group_id, + const uint32_t processor_id_in_group); + +static void store_core_info_per_processor( + struct cpuinfo_processor* processors, + const uint32_t processor_global_index, + const uint32_t core_id, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info, + struct cpuinfo_core* cores, + const struct woa_chip_info *chip_info); + +static void store_cache_info_per_processor( + struct cpuinfo_processor* processors, + const uint32_t processor_global_index, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info, + struct cpuinfo_cache* current_cache); + +static bool connect_packages_cores_clusters_by_processors( + struct cpuinfo_processor* processors, + const uint32_t nr_of_processors, + struct cpuinfo_package* packages, + const uint32_t nr_of_packages, + struct cpuinfo_cluster* clusters, + struct cpuinfo_core* cores, + const uint32_t nr_of_cores, + const struct woa_chip_info* chip_info, + enum cpuinfo_vendor vendor); + +static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity); + + +bool cpu_info_init_by_logical_sys_info( + const struct woa_chip_info *chip_info, + const enum cpuinfo_vendor vendor) +{ + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_package* packages = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cache* caches = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; + + uint32_t nr_of_packages = 0; + uint32_t nr_of_cores = 0; + uint32_t nr_of_all_caches = 0; + uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0}; + + uint32_t nr_of_uarchs = 0; + bool result = false; + + HANDLE heap = GetProcessHeap(); + + /* 1. Count available logical processor groups and processors */ + const uint32_t max_group_count = (uint32_t) GetMaximumProcessorGroupCount(); + cpuinfo_log_debug("detected %"PRIu32" processor group(s)", max_group_count); + /* We need to store the absolute processor ID offsets for every groups, because + * 1. We can't assume every processor groups include the same number of + * logical processors. + * 2. Every processor groups know its group number and processor IDs within + * the group, but not the global processor IDs. + * 3. We need to list every logical processors by global IDs. + */ + uint32_t* global_proc_index_per_group = + (uint32_t*) HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t)); + if (global_proc_index_per_group == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" processor groups", + max_group_count * sizeof(struct cpuinfo_processor), max_group_count); + goto clean_up; + } + + uint32_t nr_of_processors = + count_logical_processors(max_group_count, global_proc_index_per_group); + processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + nr_of_processors * sizeof(struct cpuinfo_processor), nr_of_processors); + goto clean_up; + } + + /* 2. Read topology information via MSDN API: packages, cores and caches*/ + nr_of_packages = read_packages_for_processors( + processors, nr_of_processors, + global_proc_index_per_group, + chip_info); + if (!nr_of_packages) { + cpuinfo_log_error("error in reading package information"); + goto clean_up; + } + cpuinfo_log_debug("detected %"PRIu32" processor package(s)", nr_of_packages); + + /* We need the EfficiencyClass to parse uarch from the core information, + * but we need to iterate first to count cores and allocate memory then + * we will iterate again to read and store data to cpuinfo_core structures. + */ + nr_of_cores = read_cores_for_processors( + processors, nr_of_processors, + global_proc_index_per_group, NULL, + chip_info); + if (!nr_of_cores) { + cpuinfo_log_error("error in reading core information"); + goto clean_up; + } + cpuinfo_log_debug("detected %"PRIu32" processor core(s)", nr_of_cores); + + /* There is no API to read number of caches, so we need to iterate twice on caches: + 1. Count all type of caches -> allocate memory + 2. Read out cache data and store to allocated memory + */ + nr_of_all_caches = read_caches_for_processors( + processors, nr_of_processors, + caches, numbers_of_caches, + global_proc_index_per_group, chip_info); + if (!nr_of_all_caches) { + cpuinfo_log_error("error in reading cache information"); + goto clean_up; + } + cpuinfo_log_debug("detected %"PRIu32" processor cache(s)", nr_of_all_caches); + + /* 3. Allocate memory for package, cluster, core and cache structures */ + packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages", + nr_of_packages * sizeof(struct cpuinfo_package), nr_of_packages); + goto clean_up; + } + + /* We don't have cluster information so we explicitly set clusters to equal to cores. */ + clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + nr_of_cores * sizeof(struct cpuinfo_cluster), nr_of_cores); + goto clean_up; + } + + cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + nr_of_cores * sizeof(struct cpuinfo_core), nr_of_cores); + goto clean_up; + } + + /* We allocate one contiguous cache array for all caches, then use offsets per cache type. */ + caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache)); + if (caches == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" caches", + nr_of_all_caches * sizeof(struct cpuinfo_cache), nr_of_all_caches); + goto clean_up; + } + + /* 4.Read missing topology information that can't be saved without counted + * allocate structures in the first round. + */ + nr_of_all_caches = read_caches_for_processors( + processors, nr_of_processors, + caches, numbers_of_caches, global_proc_index_per_group, chip_info); + if (!nr_of_all_caches) { + cpuinfo_log_error("error in reading cache information"); + goto clean_up; + } + + nr_of_cores = read_cores_for_processors( + processors, nr_of_processors, + global_proc_index_per_group, cores, + chip_info); + if (!nr_of_cores) { + cpuinfo_log_error("error in reading core information"); + goto clean_up; + } + + /* 5. Now that we read out everything from the system we can, fill the package, cluster + * and core structures respectively. + */ + result = connect_packages_cores_clusters_by_processors( + processors, nr_of_processors, + packages, nr_of_packages, + clusters, + cores, nr_of_cores, + chip_info, + vendor); + if(!result) { + cpuinfo_log_error("error in connecting information"); + goto clean_up; + } + + /* 6. Count and store uarchs of cores, assuming same uarchs are neighbors */ + enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown; + for (uint32_t i = 0; i < nr_of_cores; i++) { + if (prev_uarch != cores[i].uarch) { + nr_of_uarchs++; + prev_uarch = cores[i].uarch; + } + } + uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs", + nr_of_uarchs * sizeof(struct cpuinfo_uarch_info), nr_of_uarchs); + goto clean_up; + } + prev_uarch = cpuinfo_uarch_unknown; + for (uint32_t i = 0, uarch_index = 0; i < nr_of_cores; i++) { + if (prev_uarch != cores[i].uarch) { + if (i != 0) { + uarch_index++; + } + if (uarch_index >= nr_of_uarchs) { + cpuinfo_log_error("more uarchs detected than reported"); + } + prev_uarch = cores[i].uarch; + uarchs[uarch_index].uarch = cores[i].uarch; + uarchs[uarch_index].core_count = 1; + uarchs[uarch_index].processor_count = cores[i].processor_count; + } else if (prev_uarch != cpuinfo_uarch_unknown) { + uarchs[uarch_index].core_count++; + uarchs[uarch_index].processor_count += cores[i].processor_count; + } + } + + /* 7. Commit changes */ + cpuinfo_processors = processors; + cpuinfo_packages = packages; + cpuinfo_clusters = clusters; + cpuinfo_cores = cores; + cpuinfo_uarchs = uarchs; + + cpuinfo_processors_count = nr_of_processors; + cpuinfo_packages_count = nr_of_packages; + cpuinfo_clusters_count = nr_of_cores; + cpuinfo_cores_count = nr_of_cores; + cpuinfo_uarchs_count = nr_of_uarchs; + + for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) { + cpuinfo_cache_count[i] = numbers_of_caches[i]; + } + cpuinfo_cache[cpuinfo_cache_level_1i] = caches; + cpuinfo_cache[cpuinfo_cache_level_1d] = cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i]; + cpuinfo_cache[cpuinfo_cache_level_2] = cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d]; + cpuinfo_cache[cpuinfo_cache_level_3] = cpuinfo_cache[cpuinfo_cache_level_2] + cpuinfo_cache_count[cpuinfo_cache_level_2]; + cpuinfo_cache[cpuinfo_cache_level_4] = cpuinfo_cache[cpuinfo_cache_level_3] + cpuinfo_cache_count[cpuinfo_cache_level_3]; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + + result = true; + MemoryBarrier(); + + processors = NULL; + packages = NULL; + clusters = NULL; + cores = NULL; + caches = NULL; + uarchs = NULL; + +clean_up: + /* The propagated pointers, shouldn't be freed, only in case of error + * and unfinished init. + */ + if (processors != NULL) { + HeapFree(heap, 0, processors); + } + if (packages != NULL) { + HeapFree(heap, 0, packages); + } + if (clusters != NULL) { + HeapFree(heap, 0, clusters); + } + if (cores != NULL) { + HeapFree(heap, 0, cores); + } + if (caches != NULL) { + HeapFree(heap, 0, caches); + } + if (uarchs != NULL) { + HeapFree(heap, 0, uarchs); + } + + /* Free the locally used temporary pointers */ + HeapFree(heap, 0, global_proc_index_per_group); + global_proc_index_per_group = NULL; + return result; +} + +static uint32_t count_logical_processors( + const uint32_t max_group_count, + uint32_t* global_proc_index_per_group) +{ + uint32_t nr_of_processors = 0; + + for (uint32_t i = 0; i < max_group_count; i++) { + uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD) i); + cpuinfo_log_debug("detected %"PRIu32" processor(s) in group %"PRIu32"", + nr_of_processors_per_group, i); + global_proc_index_per_group[i] = nr_of_processors; + nr_of_processors += nr_of_processors_per_group; + } + return nr_of_processors; +} + +static uint32_t read_packages_for_processors( + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + const uint32_t* global_proc_index_per_group, + const struct woa_chip_info *chip_info) +{ + return read_all_logical_processor_info_of_relation( + RelationProcessorPackage, + processors, + number_of_processors, + NULL, + NULL, + NULL, + global_proc_index_per_group, + chip_info); +} + +uint32_t read_cores_for_processors( + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + const uint32_t* global_proc_index_per_group, + struct cpuinfo_core* cores, + const struct woa_chip_info *chip_info) +{ + return read_all_logical_processor_info_of_relation( + RelationProcessorCore, + processors, + number_of_processors, + NULL, + NULL, + cores, + global_proc_index_per_group, + chip_info); +} + +static uint32_t read_caches_for_processors( + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + struct cpuinfo_cache* caches, + uint32_t* numbers_of_caches, + const uint32_t* global_proc_index_per_group, + const struct woa_chip_info *chip_info) +{ + /* Reset processor start indexes */ + if (caches) { + uint32_t cache_offset = 0; + for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) { + for (uint32_t j = 0; j < numbers_of_caches[i]; j++) { + caches[cache_offset + j].processor_start = UINT32_MAX; + } + cache_offset += numbers_of_caches[i]; + } + } + + return read_all_logical_processor_info_of_relation( + RelationCache, + processors, + number_of_processors, + caches, + numbers_of_caches, + NULL, + global_proc_index_per_group, + chip_info); +} + +static uint32_t read_all_logical_processor_info_of_relation( + LOGICAL_PROCESSOR_RELATIONSHIP info_type, + struct cpuinfo_processor* processors, + const uint32_t number_of_processors, + struct cpuinfo_cache* caches, + uint32_t* numbers_of_caches, + struct cpuinfo_core* cores, + const uint32_t* global_proc_index_per_group, + const struct woa_chip_info* chip_info) +{ + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL; + uint32_t nr_of_structs = 0; + DWORD info_size = 0; + bool result = false; + HANDLE heap = GetProcessHeap(); + + /* 1. Query the size of the information structure first */ + if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) { + const DWORD last_error = GetLastError(); + if (last_error != ERROR_INSUFFICIENT_BUFFER) { + cpuinfo_log_error( + "failed to query size of processor %"PRIu32" information information: error %"PRIu32"", + (uint32_t)info_type, (uint32_t) last_error); + goto clean_up; + } + } + /* 2. Allocate memory for the information structure */ + infos = HeapAlloc(heap, 0, info_size); + if (infos == NULL) { + cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information", + (uint32_t) info_size); + goto clean_up; + } + /* 3. Read the information structure */ + if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) { + cpuinfo_log_error("failed to query processor %"PRIu32" information: error %"PRIu32"", + (uint32_t)info_type, (uint32_t) GetLastError()); + goto clean_up; + } + + /* 4. Parse the structure and store relevant data */ + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) infos + info_size); + for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos; + info < info_end; + info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) info + info->Size)) + { + if (info->Relationship != info_type) { + cpuinfo_log_warning( + "unexpected processor info type (%"PRIu32") for processor information", + (uint32_t) info->Relationship); + continue; + } + + const uint32_t info_id = nr_of_structs++; + + switch(info_type) { + case RelationProcessorPackage: + result = parse_relation_processor_info( + processors, + number_of_processors, + global_proc_index_per_group, + info, + info_id, + cores, + chip_info); + break; + case RelationProcessorCore: + result = parse_relation_processor_info( + processors, + number_of_processors, + global_proc_index_per_group, + info, + info_id, + cores, + chip_info); + break; + case RelationCache: + result = parse_relation_cache_info( + processors, + caches, + numbers_of_caches, + global_proc_index_per_group, + info); + break; + default: + cpuinfo_log_error( + "unexpected processor info type (%"PRIu32") for processor information", + (uint32_t) info->Relationship); + result = false; + break; + } + if (!result) { + nr_of_structs = 0; + goto clean_up; + } + } +clean_up: + /* 5. Release dynamically allocated info structure. */ + HeapFree(heap, 0, infos); + infos = NULL; + return nr_of_structs; +} + +static bool parse_relation_processor_info( + struct cpuinfo_processor* processors, + uint32_t nr_of_processors, + const uint32_t* global_proc_index_per_group, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info, + const uint32_t info_id, + struct cpuinfo_core* cores, + const struct woa_chip_info *chip_info) +{ + for (uint32_t i = 0; i < info->Processor.GroupCount; i++) { + const uint32_t group_id = info->Processor.GroupMask[i].Group; + /* Bitmask representing processors in this group belonging to this package */ + KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask; + while (group_processors_mask != 0) { + const uint32_t processor_id_in_group = + low_index_from_kaffinity(group_processors_mask); + const uint32_t processor_global_index = + global_proc_index_per_group[group_id] + processor_id_in_group; + + if(processor_global_index >= nr_of_processors) { + cpuinfo_log_error("unexpected processor index %"PRIu32"", + processor_global_index); + return false; + } + + switch(info->Relationship) { + case RelationProcessorPackage: + store_package_info_per_processor( + processors, processor_global_index, info_id, + group_id, processor_id_in_group); + break; + case RelationProcessorCore: + store_core_info_per_processor( + processors, processor_global_index, + info_id, info, + cores, chip_info); + break; + default: + cpuinfo_log_error( + "unexpected processor info type (%"PRIu32") for processor information", + (uint32_t) info->Relationship); + break; + } + /* Clear the bits in affinity mask, lower the least set bit. */ + group_processors_mask &= (group_processors_mask - 1); + } + } + return true; +} + +static bool parse_relation_cache_info( + struct cpuinfo_processor* processors, + struct cpuinfo_cache* caches, + uint32_t* numbers_of_caches, + const uint32_t* global_proc_index_per_group, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info) +{ + static uint32_t l1i_counter = 0; + static uint32_t l1d_counter = 0; + static uint32_t l2_counter = 0; + static uint32_t l3_counter = 0; + + /* Count cache types for allocation at first. */ + if (caches == NULL) { + switch(info->Cache.Level) { + case 1: + switch (info->Cache.Type) { + case CacheInstruction: + numbers_of_caches[cpuinfo_cache_level_1i]++; + break; + case CacheData: + numbers_of_caches[cpuinfo_cache_level_1d]++; + break; + case CacheUnified: + break; + case CacheTrace: + break; + default: + break; + } + break; + case 2: + numbers_of_caches[cpuinfo_cache_level_2]++; + break; + case 3: + numbers_of_caches[cpuinfo_cache_level_3]++; + break; + } + return true; + } + struct cpuinfo_cache* l1i_base = caches; + struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i]; + struct cpuinfo_cache* l2_base = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d]; + struct cpuinfo_cache* l3_base = l2_base + numbers_of_caches[cpuinfo_cache_level_2]; + + cpuinfo_log_debug( + "info->Cache.GroupCount:%"PRIu32", info->Cache.GroupMask:%"PRIu32"," + "info->Cache.Level:%"PRIu32", info->Cache.Associativity:%"PRIu32"," + "info->Cache.LineSize:%"PRIu32"," + "info->Cache.CacheSize:%"PRIu32", info->Cache.Type:%"PRIu32"", + info->Cache.GroupCount, (unsigned int)info->Cache.GroupMask.Mask, + info->Cache.Level, info->Cache.Associativity, info->Cache.LineSize, + info->Cache.CacheSize, info->Cache.Type); + + struct cpuinfo_cache* current_cache = NULL; + switch (info->Cache.Level) { + case 1: + switch (info->Cache.Type) { + case CacheInstruction: + current_cache = l1i_base + l1i_counter; + l1i_counter++; + break; + case CacheData: + current_cache = l1d_base + l1d_counter; + l1d_counter++; + break; + case CacheUnified: + break; + case CacheTrace: + break; + default: + break; + } + break; + case 2: + current_cache = l2_base + l2_counter; + l2_counter++; + break; + case 3: + current_cache = l3_base + l3_counter; + l3_counter++; + break; + } + current_cache->size = info->Cache.CacheSize; + current_cache->line_size = info->Cache.LineSize; + current_cache->associativity = info->Cache.Associativity; + /* We don't have partition and set information of caches on Windows, + * so we set partitions to 1 and calculate the expected sets. + */ + current_cache->partitions = 1; + current_cache->sets = + current_cache->size / current_cache->line_size / current_cache->associativity; + if (info->Cache.Type == CacheUnified) { + current_cache->flags = CPUINFO_CACHE_UNIFIED; + } + + for (uint32_t i = 0; i < info->Cache.GroupCount; i++) { + /* Zero GroupCount is valid, GroupMask still can store bits set. */ + const uint32_t group_id = info->Cache.GroupMasks[i].Group; + /* Bitmask representing processors in this group belonging to this package */ + KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask; + while (group_processors_mask != 0) { + const uint32_t processor_id_in_group = + low_index_from_kaffinity(group_processors_mask); + const uint32_t processor_global_index = + global_proc_index_per_group[group_id] + processor_id_in_group; + + store_cache_info_per_processor( + processors, processor_global_index, + info, current_cache); + + /* Clear the bits in affinity mask, lower the least set bit. */ + group_processors_mask &= (group_processors_mask - 1); + } + } + return true; +} + +static void store_package_info_per_processor( + struct cpuinfo_processor* processors, + const uint32_t processor_global_index, + const uint32_t package_id, + const uint32_t group_id, + const uint32_t processor_id_in_group) +{ + processors[processor_global_index].windows_group_id = + (uint16_t) group_id; + processors[processor_global_index].windows_processor_id = + (uint16_t) processor_id_in_group; + + /* As we're counting the number of packages now, we haven't allocated memory for + * cpuinfo_packages yet, so we only set the package pointer's offset now. + */ + processors[processor_global_index].package = + (const struct cpuinfo_package*) NULL + package_id; +} + +void store_core_info_per_processor( + struct cpuinfo_processor* processors, + const uint32_t processor_global_index, + const uint32_t core_id, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info, + struct cpuinfo_core* cores, + const struct woa_chip_info *chip_info) +{ + if (cores) { + processors[processor_global_index].core = cores + core_id; + cores[core_id].core_id = core_id; + get_core_uarch_for_efficiency( + chip_info->chip_name, core_info->Processor.EfficiencyClass, + &(cores[core_id].uarch), &(cores[core_id].frequency)); + + /* We don't have cluster information, so we handle it as + * fixed 1 to (cluster / cores). + * Set the cluster offset ID now, as soon as we have the + * cluster base address, we'll set the absolute address. + */ + processors[processor_global_index].cluster = + (const struct cpuinfo_cluster*) NULL + core_id; + } +} + +static void store_cache_info_per_processor( + struct cpuinfo_processor* processors, + const uint32_t processor_global_index, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info, + struct cpuinfo_cache* current_cache) +{ + if (current_cache->processor_start > processor_global_index) { + current_cache->processor_start = processor_global_index; + } + current_cache->processor_count++; + + switch(info->Cache.Level) { + case 1: + switch (info->Cache.Type) { + case CacheInstruction: + processors[processor_global_index].cache.l1i = current_cache; + break; + case CacheData: + processors[processor_global_index].cache.l1d = current_cache; + break; + case CacheUnified: + break; + case CacheTrace: + break; + default: + break; + } + break; + case 2: + processors[processor_global_index].cache.l2 = current_cache; + break; + case 3: + processors[processor_global_index].cache.l3 = current_cache; + break; + } +} + +static bool connect_packages_cores_clusters_by_processors( + struct cpuinfo_processor* processors, + const uint32_t nr_of_processors, + struct cpuinfo_package* packages, + const uint32_t nr_of_packages, + struct cpuinfo_cluster* clusters, + struct cpuinfo_core* cores, + const uint32_t nr_of_cores, + const struct woa_chip_info* chip_info, + enum cpuinfo_vendor vendor) +{ + /* Adjust core and package pointers for all logical processors. */ + for (uint32_t i = nr_of_processors; i != 0; i--) { + const uint32_t processor_id = i - 1; + struct cpuinfo_processor* processor = processors + processor_id; + + struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core; + + /* We stored the offset of pointers when we haven't allocated memory + * for packages and clusters, so now add offsets to base addresses. + */ + struct cpuinfo_package* package = + (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package); + if (package < packages || + package >= (packages + nr_of_packages)) { + cpuinfo_log_error("invalid package indexing"); + return false; + } + processor->package = package; + + struct cpuinfo_cluster* cluster = + (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster); + if (cluster < clusters || + cluster >= (clusters + nr_of_cores)) { + cpuinfo_log_error("invalid cluster indexing"); + return false; + } + processor->cluster = cluster; + + if (chip_info) { + size_t converted_chars = 0; + if (!WideCharToMultiByte( + CP_UTF8, + WC_ERR_INVALID_CHARS, + chip_info->chip_name_string, + -1, + package->name, + CPUINFO_PACKAGE_NAME_MAX, + NULL, + NULL)) { + cpuinfo_log_error("cpu name character conversion error"); + return false; + }; + } + + /* Set start indexes and counts per packages / clusters / cores - going backwards */ + + /* This can be overwritten by lower-index processors on the same package. */ + package->processor_start = processor_id; + package->processor_count++; + + /* This can be overwritten by lower-index processors on the same cluster. */ + cluster->processor_start = processor_id; + cluster->processor_count++; + + /* This can be overwritten by lower-index processors on the same core. */ + core->processor_start = processor_id; + core->processor_count++; + } + /* Fill cores */ + for (uint32_t i = nr_of_cores; i != 0; i--) { + const uint32_t global_core_id = i - 1; + struct cpuinfo_core* core = cores + global_core_id; + const struct cpuinfo_processor* processor = processors + core->processor_start; + struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package; + struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster; + + core->package = package; + core->cluster = cluster; + core->vendor = vendor; + + /* This can be overwritten by lower-index cores on the same cluster/package. */ + cluster->core_start = global_core_id; + cluster->core_count++; + package->core_start = global_core_id; + package->core_count++; + package->cluster_start = global_core_id; + package->cluster_count = package->core_count; + + cluster->package = package; + cluster->vendor = cores[cluster->core_start].vendor; + cluster->uarch = cores[cluster->core_start].uarch; + cluster->frequency = cores[cluster->core_start].frequency; + } + return true; +} + +static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) { + unsigned long index; + _BitScanForward64(&index, (unsigned __int64) kaffinity); + return (uint32_t) index; +} diff --git a/3rdparty/cpuinfo/src/arm/windows/init.c b/3rdparty/cpuinfo/src/arm/windows/init.c new file mode 100644 index 0000000000000..cff89196bf6f1 --- /dev/null +++ b/3rdparty/cpuinfo/src/arm/windows/init.c @@ -0,0 +1,243 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "windows-arm-init.h" + +struct cpuinfo_arm_isa cpuinfo_isa; + +static void set_cpuinfo_isa_fields(void); +static struct woa_chip_info* get_system_info_from_registry(void); + +static struct woa_chip_info woa_chip_unknown = { + L"Unknown", + woa_chip_name_unknown, + { + { + cpuinfo_vendor_unknown, + cpuinfo_uarch_unknown, + 0 + } + } +}; + +/* Please add new SoC/chip info here! */ +static struct woa_chip_info woa_chips[] = { + /* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */ + { + L"Microsoft SQ1", + woa_chip_name_microsoft_sq_1, + { + { + cpuinfo_vendor_arm, + cpuinfo_uarch_cortex_a55, + 1800000000, + }, + { + cpuinfo_vendor_arm, + cpuinfo_uarch_cortex_a76, + 3000000000, + } + } + }, + /* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */ + { + L"Microsoft SQ2", + woa_chip_name_microsoft_sq_2, + { + { + cpuinfo_vendor_arm, + cpuinfo_uarch_cortex_a55, + 2420000000, + }, + { + cpuinfo_vendor_arm, + cpuinfo_uarch_cortex_a76, + 3150000000 + } + } + }, + /* Microsoft Windows Dev Kit 2023 */ + { + L"Snapdragon Compute Platform", + woa_chip_name_microsoft_sq_3, + { + { + cpuinfo_vendor_arm, + cpuinfo_uarch_cortex_a78, + 2420000000, + }, + { + cpuinfo_vendor_arm, + cpuinfo_uarch_cortex_x1, + 3000000000 + } + } + }, + /* Ampere Altra */ + { + L"Ampere(R) Altra(R) Processor", + woa_chip_name_ampere_altra, + { + { + cpuinfo_vendor_arm, + cpuinfo_uarch_neoverse_n1, + 3000000000 + } + } + } +}; + +BOOL CALLBACK cpuinfo_arm_windows_init( + PINIT_ONCE init_once, PVOID parameter, PVOID* context) +{ + struct woa_chip_info *chip_info = NULL; + enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown; + + set_cpuinfo_isa_fields(); + + chip_info = get_system_info_from_registry(); + if (chip_info == NULL) { + chip_info = &woa_chip_unknown; + } + + cpuinfo_is_initialized = cpu_info_init_by_logical_sys_info(chip_info, chip_info->uarchs[0].vendor); + + return true; +} + +bool get_core_uarch_for_efficiency( + enum woa_chip_name chip, BYTE EfficiencyClass, + enum cpuinfo_uarch* uarch, uint64_t* frequency) +{ + /* For currently supported WoA chips, the Efficiency class selects + * the pre-defined little and big core. + * Any further supported SoC's logic should be implemented here. + */ + if (uarch && frequency && chip < woa_chip_name_last && + EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) { + *uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch; + *frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency; + return true; + } + return false; +} + +/* Static helper functions */ + +static wchar_t* read_registry( + LPCWSTR subkey, + LPCWSTR value) +{ + DWORD key_type = 0; + DWORD data_size = 0; + const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */ + wchar_t *text_buffer = NULL; + LSTATUS result = 0; + HANDLE heap = GetProcessHeap(); + + result = RegGetValueW( + HKEY_LOCAL_MACHINE, + subkey, + value, + flags, + &key_type, + NULL, /* Request buffer size */ + &data_size); + if (result != 0 || data_size == 0) { + cpuinfo_log_error("Registry entry size read error"); + return NULL; + } + + text_buffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, data_size); + if (text_buffer == NULL) { + cpuinfo_log_error("Registry textbuffer allocation error"); + return NULL; + } + + result = RegGetValueW( + HKEY_LOCAL_MACHINE, + subkey, + value, + flags, + NULL, + text_buffer, /* Write string in this destination buffer */ + &data_size); + if (result != 0) { + cpuinfo_log_error("Registry read error"); + HeapFree(heap, 0, text_buffer); + return NULL; + } + return text_buffer; +} + +static struct woa_chip_info* get_system_info_from_registry(void) +{ + wchar_t* text_buffer = NULL; + LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; + LPCWSTR chip_name_value = L"ProcessorNameString"; + struct woa_chip_info* chip_info = NULL; + + HANDLE heap = GetProcessHeap(); + + /* Read processor model name from registry and find in the hard-coded list. */ + text_buffer = read_registry(cpu0_subkey, chip_name_value); + if (text_buffer == NULL) { + cpuinfo_log_error("Registry read error"); + return NULL; + } + for (uint32_t i = 0; i < (uint32_t) woa_chip_name_last; i++) { + size_t compare_length = wcsnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX); + int compare_result = wcsncmp(text_buffer, woa_chips[i].chip_name_string, compare_length); + if (compare_result == 0) { + chip_info = woa_chips+i; + break; + } + } + if (chip_info == NULL) { + /* No match was found, so print a warning and assign the unknown case. */ + cpuinfo_log_error("Unknown chip model name '%ls'.\nPlease add new Windows on Arm SoC/chip support to arm/windows/init.c!", text_buffer); + } else { + cpuinfo_log_debug("detected chip model name: %s", chip_info->chip_name_string); + } + + HeapFree(heap, 0, text_buffer); + return chip_info; +} + +static void set_cpuinfo_isa_fields(void) +{ + cpuinfo_isa.atomics = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != 0; + + const bool dotprod = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.dot = dotprod; + + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + switch (system_info.wProcessorLevel) { + case 0x803: // Kryo 385 Silver (Snapdragon 850) + cpuinfo_isa.fp16arith = dotprod; + cpuinfo_isa.rdm = dotprod; + break; + default: + // Assume that Dot Product support implies FP16 arithmetics and RDM support. + // ARM manuals don't guarantee that, but it holds in practice. + cpuinfo_isa.fp16arith = dotprod; + cpuinfo_isa.rdm = dotprod; + break; + } + + /* Windows API reports all or nothing for cryptographic instructions. */ + const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.aes = crypto; + cpuinfo_isa.sha1 = crypto; + cpuinfo_isa.sha2 = crypto; + cpuinfo_isa.pmull = crypto; + + cpuinfo_isa.crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; +} diff --git a/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h b/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h new file mode 100644 index 0000000000000..e327431bd9aca --- /dev/null +++ b/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h @@ -0,0 +1,38 @@ +#pragma once + +/* Efficiency class = 0 means little core, while 1 means big core for now. */ +#define MAX_WOA_VALID_EFFICIENCY_CLASSES 2 + +/* List of known and supported Windows on Arm SoCs/chips. */ +enum woa_chip_name { + woa_chip_name_microsoft_sq_1 = 0, + woa_chip_name_microsoft_sq_2 = 1, + woa_chip_name_microsoft_sq_3 = 2, + woa_chip_name_ampere_altra = 3, + woa_chip_name_unknown = 4, + woa_chip_name_last = woa_chip_name_unknown +}; + +/* Topology information hard-coded by SoC/chip name */ +struct core_info_by_chip_name { + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + uint64_t frequency; /* Hz */ +}; + +/* SoC/chip info that's currently not readable by logical system information, + * but can be read from registry. + */ +struct woa_chip_info { + wchar_t* chip_name_string; + enum woa_chip_name chip_name; + struct core_info_by_chip_name uarchs[MAX_WOA_VALID_EFFICIENCY_CLASSES]; +}; + +bool get_core_uarch_for_efficiency( + enum woa_chip_name chip, BYTE EfficiencyClass, + enum cpuinfo_uarch* uarch, uint64_t* frequency); + +bool cpu_info_init_by_logical_sys_info( + const struct woa_chip_info *chip_info, + enum cpuinfo_vendor vendor); diff --git a/3rdparty/cpuinfo/src/cpuinfo/internal-api.h b/3rdparty/cpuinfo/src/cpuinfo/internal-api.h index 9c23d7c8b73b8..69a9ec984deb5 100644 --- a/3rdparty/cpuinfo/src/cpuinfo/internal-api.h +++ b/3rdparty/cpuinfo/src/cpuinfo/internal-api.h @@ -35,7 +35,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs; extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count; #else @@ -51,10 +51,15 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); #if defined(_WIN32) || defined(__CYGWIN__) - CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #if CPUINFO_ARCH_ARM64 + CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #else + CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #endif #endif CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void); CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void); +CPUINFO_PRIVATE void cpuinfo_riscv_linux_init(void); CPUINFO_PRIVATE void cpuinfo_emscripten_init(void); CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor); diff --git a/3rdparty/cpuinfo/src/cpuinfo/log.h b/3rdparty/cpuinfo/src/cpuinfo/log.h index dac8cdb663dc2..5bd43055658f0 100644 --- a/3rdparty/cpuinfo/src/cpuinfo/log.h +++ b/3rdparty/cpuinfo/src/cpuinfo/log.h @@ -1,17 +1,103 @@ #pragma once #include +#include +#include -#include +#ifndef CPUINFO_LOG_LEVEL + #error "Undefined CPUINFO_LOG_LEVEL" +#endif -#define CPUINFO_LOG_DEBUG_PARSERS 0 +#define CPUINFO_LOG_NONE 0 +#define CPUINFO_LOG_FATAL 1 +#define CPUINFO_LOG_ERROR 2 +#define CPUINFO_LOG_WARNING 3 +#define CPUINFO_LOG_INFO 4 +#define CPUINFO_LOG_DEBUG 5 -#ifndef CPUINFO_LOG_LEVEL - #define CPUINFO_LOG_LEVEL CLOG_ERROR +#ifndef CPUINFO_LOG_DEBUG_PARSERS + #define CPUINFO_LOG_DEBUG_PARSERS 0 +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_DEBUG + void cpuinfo_vlog_debug(const char* format, va_list args); +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_INFO + void cpuinfo_vlog_info(const char* format, va_list args); +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_WARNING + void cpuinfo_vlog_warning(const char* format, va_list args); #endif -CLOG_DEFINE_LOG_DEBUG(cpuinfo_log_debug, "cpuinfo", CPUINFO_LOG_LEVEL); -CLOG_DEFINE_LOG_INFO(cpuinfo_log_info, "cpuinfo", CPUINFO_LOG_LEVEL); -CLOG_DEFINE_LOG_WARNING(cpuinfo_log_warning, "cpuinfo", CPUINFO_LOG_LEVEL); -CLOG_DEFINE_LOG_ERROR(cpuinfo_log_error, "cpuinfo", CPUINFO_LOG_LEVEL); -CLOG_DEFINE_LOG_FATAL(cpuinfo_log_fatal, "cpuinfo", CPUINFO_LOG_LEVEL); +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_ERROR + void cpuinfo_vlog_error(const char* format, va_list args); +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_FATAL + void cpuinfo_vlog_fatal(const char* format, va_list args); +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#ifndef CPUINFO_LOG_ARGUMENTS_FORMAT + #ifdef __GNUC__ + #define CPUINFO_LOG_ARGUMENTS_FORMAT __attribute__((__format__(__printf__, 1, 2))) + #else + #define CPUINFO_LOG_ARGUMENTS_FORMAT + #endif +#endif + +CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_debug(const char* format, ...) { + #if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_DEBUG + va_list args; + va_start(args, format); + cpuinfo_vlog_debug(format, args); + va_end(args); + #endif +} + +CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_info(const char* format, ...) { + #if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_INFO + va_list args; + va_start(args, format); + cpuinfo_vlog_info(format, args); + va_end(args); + #endif +} + +CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_warning(const char* format, ...) { + #if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_WARNING + va_list args; + va_start(args, format); + cpuinfo_vlog_warning(format, args); + va_end(args); + #endif +} + +CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_error(const char* format, ...) { + #if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_ERROR + va_list args; + va_start(args, format); + cpuinfo_vlog_error(format, args); + va_end(args); + #endif +} + +CPUINFO_LOG_ARGUMENTS_FORMAT inline static void cpuinfo_log_fatal(const char* format, ...) { + #if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_FATAL + va_list args; + va_start(args, format); + cpuinfo_vlog_fatal(format, args); + va_end(args); + #endif + abort(); +} \ No newline at end of file diff --git a/3rdparty/cpuinfo/src/cpuinfo/utils.h b/3rdparty/cpuinfo/src/cpuinfo/utils.h index d2aa731ebd3cf..6cfaca7bb4c08 100644 --- a/3rdparty/cpuinfo/src/cpuinfo/utils.h +++ b/3rdparty/cpuinfo/src/cpuinfo/utils.h @@ -1,10 +1,9 @@ #pragma once -#include - #ifdef _MSC_VER -#include // _BitScanReverse +#include #endif +#include inline static uint32_t bit_length(uint32_t n) { diff --git a/3rdparty/cpuinfo/src/init.c b/3rdparty/cpuinfo/src/init.c index d61e7be6eba7a..5748271568103 100644 --- a/3rdparty/cpuinfo/src/init.c +++ b/3rdparty/cpuinfo/src/init.c @@ -37,6 +37,14 @@ bool CPUINFO_ABI cpuinfo_initialize(void) { pthread_once(&init_guard, &cpuinfo_arm_linux_init); #elif defined(__MACH__) && defined(__APPLE__) pthread_once(&init_guard, &cpuinfo_arm_mach_init); + #elif defined(_WIN32) + InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL); + #else + cpuinfo_log_error("operating system is not supported in cpuinfo"); + #endif +#elif CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + #if defined(__linux__) + pthread_once(&init_guard, &cpuinfo_riscv_linux_init); #else cpuinfo_log_error("operating system is not supported in cpuinfo"); #endif diff --git a/3rdparty/cpuinfo/src/linux/api.h b/3rdparty/cpuinfo/src/linux/api.h index f55b8ac73ffe5..d33cbd7dc7fea 100644 --- a/3rdparty/cpuinfo/src/linux/api.h +++ b/3rdparty/cpuinfo/src/linux/api.h @@ -21,7 +21,8 @@ #define CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER UINT32_C(0x00000400) #define CPUINFO_LINUX_FLAG_PROC_CPUINFO UINT32_C(0x00000800) #define CPUINFO_LINUX_FLAG_VALID UINT32_C(0x00001000) - +#define CPUINFO_LINUX_FLAG_CUR_FREQUENCY UINT32_C(0x00002000) +#define CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER UINT32_C(0x00004000) typedef bool (*cpuinfo_cpulist_callback)(uint32_t, uint32_t, void*); CPUINFO_INTERNAL bool cpuinfo_linux_parse_cpulist(const char* filename, cpuinfo_cpulist_callback callback, void* context); @@ -33,6 +34,7 @@ CPUINFO_INTERNAL bool cpuinfo_linux_parse_multiline_file(const char* filename, s CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_processors_count(void); CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count); CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count); +CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_cur_frequency(uint32_t processor); CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor); CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor); CPUINFO_INTERNAL bool cpuinfo_linux_get_processor_package_id(uint32_t processor, uint32_t package_id[restrict static 1]); @@ -54,6 +56,21 @@ CPUINFO_INTERNAL bool cpuinfo_linux_detect_thread_siblings( uint32_t processor, cpuinfo_siblings_callback callback, void* context); +CPUINFO_INTERNAL bool cpuinfo_linux_detect_cluster_cpus( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context); +CPUINFO_INTERNAL bool cpuinfo_linux_detect_core_cpus( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context); +CPUINFO_INTERNAL bool cpuinfo_linux_detect_package_cpus( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context); extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map; extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map; diff --git a/3rdparty/cpuinfo/src/linux/processors.c b/3rdparty/cpuinfo/src/linux/processors.c index aedba743823b4..246b4a2eed7ae 100644 --- a/3rdparty/cpuinfo/src/linux/processors.c +++ b/3rdparty/cpuinfo/src/linux/processors.c @@ -21,6 +21,7 @@ #define KERNEL_MAX_FILENAME "/sys/devices/system/cpu/kernel_max" #define KERNEL_MAX_FILESIZE 32 #define FREQUENCY_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/cpufreq/cpuinfo_max_freq")) +#define CUR_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_cur_freq" #define MAX_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_max_freq" #define MIN_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_min_freq" #define FREQUENCY_FILESIZE 32 @@ -31,8 +32,14 @@ #define CORE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_id" #define CORE_ID_FILESIZE 32 +#define CORE_CPUS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_cpus_list")) +#define CORE_CPUS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_cpus_list" #define CORE_SIBLINGS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_siblings_list")) #define CORE_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_siblings_list" +#define CLUSTER_CPUS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/cluster_cpus_list")) +#define CLUSTER_CPUS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/cluster_cpus_list" +#define PACKAGE_CPUS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/package_cpus_list")) +#define PACKAGE_CPUS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/package_cpus_list" #define THREAD_SIBLINGS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/thread_siblings_list")) #define THREAD_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/thread_siblings_list" @@ -125,6 +132,27 @@ uint32_t cpuinfo_linux_get_max_processors_count(void) { } } +uint32_t cpuinfo_linux_get_processor_cur_frequency(uint32_t processor) { + char cur_frequency_filename[FREQUENCY_FILENAME_SIZE]; + const int chars_formatted = snprintf( + cur_frequency_filename, FREQUENCY_FILENAME_SIZE, CUR_FREQUENCY_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= FREQUENCY_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for current frequency of processor %"PRIu32, processor); + return 0; + } + + uint32_t cur_frequency; + if (cpuinfo_linux_parse_small_file(cur_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &cur_frequency)) { + cpuinfo_log_debug("parsed currrent frequency value of %"PRIu32" KHz for logical processor %"PRIu32" from %s", + cur_frequency, processor, cur_frequency_filename); + return cur_frequency; + } else { + cpuinfo_log_warning("failed to parse current frequency for processor %"PRIu32" from %s", + processor, cur_frequency_filename); + return 0; + } +} + uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor) { char max_frequency_filename[FREQUENCY_FILENAME_SIZE]; const int chars_formatted = snprintf( @@ -285,8 +313,7 @@ static bool detect_processor_parser(uint32_t processor_list_start, uint32_t proc } bool cpuinfo_linux_detect_possible_processors(uint32_t max_processors_count, - uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t possible_flag) -{ + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t possible_flag) { struct detect_processors_context context = { .max_processors_count = max_processors_count, .processor0_flags = processor0_flags, @@ -302,8 +329,7 @@ bool cpuinfo_linux_detect_possible_processors(uint32_t max_processors_count, } bool cpuinfo_linux_detect_present_processors(uint32_t max_processors_count, - uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t present_flag) -{ + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t present_flag) { struct detect_processors_context context = { .max_processors_count = max_processors_count, .processor0_flags = processor0_flags, @@ -340,12 +366,41 @@ static bool siblings_parser(uint32_t sibling_list_start, uint32_t sibling_list_e return context->callback(processor, sibling_list_start, sibling_list_end, context->callback_context); } +bool cpuinfo_linux_detect_core_cpus( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context) { + char core_cpus_filename[CORE_CPUS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + core_cpus_filename, CORE_CPUS_FILENAME_SIZE, CORE_CPUS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= CORE_CPUS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for core cpus of processor %"PRIu32, processor); + return false; + } + + struct siblings_context siblings_context = { + .group_name = "cpus", + .max_processors_count = max_processors_count, + .processor = processor, + .callback = callback, + .callback_context = context, + }; + if (cpuinfo_linux_parse_cpulist(core_cpus_filename, + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) { + return true; + } else { + cpuinfo_log_info("failed to parse the list of core cpus for processor %"PRIu32" from %s", + processor, core_cpus_filename); + return false; + } +} + bool cpuinfo_linux_detect_core_siblings( uint32_t max_processors_count, uint32_t processor, cpuinfo_siblings_callback callback, - void* context) -{ + void* context) { char core_siblings_filename[CORE_SIBLINGS_FILENAME_SIZE]; const int chars_formatted = snprintf( core_siblings_filename, CORE_SIBLINGS_FILENAME_SIZE, CORE_SIBLINGS_FILENAME_FORMAT, processor); @@ -362,8 +417,7 @@ bool cpuinfo_linux_detect_core_siblings( .callback_context = context, }; if (cpuinfo_linux_parse_cpulist(core_siblings_filename, - (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) - { + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) { return true; } else { cpuinfo_log_info("failed to parse the list of core siblings for processor %"PRIu32" from %s", @@ -376,8 +430,7 @@ bool cpuinfo_linux_detect_thread_siblings( uint32_t max_processors_count, uint32_t processor, cpuinfo_siblings_callback callback, - void* context) -{ + void* context) { char thread_siblings_filename[THREAD_SIBLINGS_FILENAME_SIZE]; const int chars_formatted = snprintf( thread_siblings_filename, THREAD_SIBLINGS_FILENAME_SIZE, THREAD_SIBLINGS_FILENAME_FORMAT, processor); @@ -394,8 +447,7 @@ bool cpuinfo_linux_detect_thread_siblings( .callback_context = context, }; if (cpuinfo_linux_parse_cpulist(thread_siblings_filename, - (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) - { + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) { return true; } else { cpuinfo_log_info("failed to parse the list of thread siblings for processor %"PRIu32" from %s", @@ -404,3 +456,62 @@ bool cpuinfo_linux_detect_thread_siblings( } } +bool cpuinfo_linux_detect_cluster_cpus( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context) { + char cluster_cpus_filename[CLUSTER_CPUS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + cluster_cpus_filename, CLUSTER_CPUS_FILENAME_SIZE, CLUSTER_CPUS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= CLUSTER_CPUS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for cluster cpus of processor %"PRIu32, processor); + return false; + } + + struct siblings_context siblings_context = { + .group_name = "cluster", + .max_processors_count = max_processors_count, + .processor = processor, + .callback = callback, + .callback_context = context, + }; + if (cpuinfo_linux_parse_cpulist(cluster_cpus_filename, + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) { + return true; + } else { + cpuinfo_log_info("failed to parse the list of cluster cpus for processor %"PRIu32" from %s", + processor, cluster_cpus_filename); + return false; + } +} + +bool cpuinfo_linux_detect_package_cpus( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context) { + char package_cpus_filename[PACKAGE_CPUS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + package_cpus_filename, PACKAGE_CPUS_FILENAME_SIZE, PACKAGE_CPUS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= PACKAGE_CPUS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for package cpus of processor %"PRIu32, processor); + return false; + } + + struct siblings_context siblings_context = { + .group_name = "package", + .max_processors_count = max_processors_count, + .processor = processor, + .callback = callback, + .callback_context = context, + }; + if (cpuinfo_linux_parse_cpulist(package_cpus_filename, + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) { + return true; + } else { + cpuinfo_log_info("failed to parse the list of package cpus for processor %"PRIu32" from %s", + processor, package_cpus_filename); + return false; + } +} diff --git a/3rdparty/cpuinfo/src/log.c b/3rdparty/cpuinfo/src/log.c new file mode 100644 index 0000000000000..bec604eeeac59 --- /dev/null +++ b/3rdparty/cpuinfo/src/log.c @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include +#ifdef _WIN32 + #include +#else + #include +#endif +#if defined(__ANDROID__) + #include +#endif +#if defined(__hexagon__) + #include +#endif + +#ifndef CPUINFO_LOG_TO_STDIO + #if defined(__ANDROID__) + #define CPUINFO_LOG_TO_STDIO 0 + #else + #define CPUINFO_LOG_TO_STDIO 1 + #endif +#endif + +#include + + +/* Messages up to this size are formatted entirely on-stack, and don't allocate heap memory */ +#define CPUINFO_LOG_STACK_BUFFER_SIZE 1024 + +#ifdef _WIN32 + #define CPUINFO_LOG_NEWLINE_LENGTH 2 + + #define CPUINFO_LOG_STDERR STD_ERROR_HANDLE + #define CPUINFO_LOG_STDOUT STD_OUTPUT_HANDLE +#elif defined(__hexagon__) + #define CPUINFO_LOG_NEWLINE_LENGTH 1 + + #define CPUINFO_LOG_STDERR 0 + #define CPUINFO_LOG_STDOUT 0 +#else + #define CPUINFO_LOG_NEWLINE_LENGTH 1 + + #define CPUINFO_LOG_STDERR STDERR_FILENO + #define CPUINFO_LOG_STDOUT STDOUT_FILENO +#endif + +#if CPUINFO_LOG_TO_STDIO +static void cpuinfo_vlog(int output_handle, const char* prefix, size_t prefix_length, const char* format, va_list args) { + char stack_buffer[CPUINFO_LOG_STACK_BUFFER_SIZE]; + char* heap_buffer = NULL; + char* out_buffer = &stack_buffer[0]; + + /* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */ + va_list args_copy; + va_copy(args_copy, args); + + memcpy(stack_buffer, prefix, prefix_length * sizeof(char)); + assert((prefix_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char) <= CPUINFO_LOG_STACK_BUFFER_SIZE); + + const int format_chars = vsnprintf( + &stack_buffer[prefix_length], + CPUINFO_LOG_STACK_BUFFER_SIZE - (prefix_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char), + format, + args); + if (format_chars < 0) { + /* Format error in the message: silently ignore this particular message. */ + goto cleanup; + } + const size_t format_length = (size_t) format_chars; + if ((prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char) > CPUINFO_LOG_STACK_BUFFER_SIZE) { + /* Allocate a buffer on heap, and vsnprintf to this buffer */ + const size_t heap_buffer_size = (prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char); + #if _WIN32 + heap_buffer = HeapAlloc(GetProcessHeap(), 0, heap_buffer_size); + #else + heap_buffer = malloc(heap_buffer_size); + #endif + if (heap_buffer == NULL) { + goto cleanup; + } + + /* Copy pre-formatted prefix into the on-heap buffer */ + memcpy(heap_buffer, prefix, prefix_length * sizeof(char)); + vsnprintf(&heap_buffer[prefix_length], (format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char), format, args_copy); + out_buffer = heap_buffer; + } + #ifdef _WIN32 + out_buffer[prefix_length + format_length] = '\r'; + out_buffer[prefix_length + format_length + 1] = '\n'; + + DWORD bytes_written; + WriteFile( + GetStdHandle((DWORD) output_handle), + out_buffer, (prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char), + &bytes_written, NULL); + #elif defined(__hexagon__) + qurt_printf("%s", out_buffer); + #else + out_buffer[prefix_length + format_length] = '\n'; + + ssize_t bytes_written = write(output_handle, out_buffer, (prefix_length + format_length + CPUINFO_LOG_NEWLINE_LENGTH) * sizeof(char)); + (void) bytes_written; + #endif + +cleanup: + #ifdef _WIN32 + HeapFree(GetProcessHeap(), 0, heap_buffer); + #else + free(heap_buffer); + #endif + va_end(args_copy); +} +#elif defined(__ANDROID__) && CPUINFO_LOG_LEVEL > CPUINFO_LOG_NONE + static const char cpuinfo_module[] = "XNNPACK"; +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_DEBUG + void cpuinfo_vlog_debug(const char* format, va_list args) { + #if CPUINFO_LOG_TO_STDIO + static const char debug_prefix[17] = { + 'D', 'e', 'b', 'u', 'g', ' ', '(', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ')', ':', ' ' + }; + cpuinfo_vlog(CPUINFO_LOG_STDOUT, debug_prefix, 17, format, args); + #elif defined(__ANDROID__) + __android_log_vprint(ANDROID_LOG_DEBUG, cpuinfo_module, format, args); + #else + #error "Platform-specific implementation required" + #endif + } +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_INFO + void cpuinfo_vlog_info(const char* format, va_list args) { + #if CPUINFO_LOG_TO_STDIO + static const char info_prefix[16] = { + 'N', 'o', 't', 'e', ' ', '(', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ')', ':', ' ' + }; + cpuinfo_vlog(CPUINFO_LOG_STDOUT, info_prefix, 16, format, args); + #elif defined(__ANDROID__) + __android_log_vprint(ANDROID_LOG_INFO, cpuinfo_module, format, args); + #else + #error "Platform-specific implementation required" + #endif + } +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_WARNING + void cpuinfo_vlog_warning(const char* format, va_list args) { + #if CPUINFO_LOG_TO_STDIO + static const char warning_prefix[20] = { + 'W', 'a', 'r', 'n', 'i', 'n', 'g', ' ', 'i', 'n', ' ', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ':', ' ' + }; + cpuinfo_vlog(CPUINFO_LOG_STDERR, warning_prefix, 20, format, args); + #elif defined(__ANDROID__) + __android_log_vprint(ANDROID_LOG_WARN, cpuinfo_module, format, args); + #else + #error "Platform-specific implementation required" + #endif + } +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_ERROR + void cpuinfo_vlog_error(const char* format, va_list args) { + #if CPUINFO_LOG_TO_STDIO + static const char error_prefix[18] = { + 'E', 'r', 'r', 'o', 'r', ' ', 'i', 'n', ' ', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ':', ' ' + }; + cpuinfo_vlog(CPUINFO_LOG_STDERR, error_prefix, 18, format, args); + #elif defined(__ANDROID__) + __android_log_vprint(ANDROID_LOG_ERROR, cpuinfo_module, format, args); + #else + #error "Platform-specific implementation required" + #endif + } +#endif + +#if CPUINFO_LOG_LEVEL >= CPUINFO_LOG_FATAL + void cpuinfo_vlog_fatal(const char* format, va_list args) { + #if CPUINFO_LOG_TO_STDIO + static const char fatal_prefix[24] = { + 'F', 'a', 't', 'a', 'l', ' ', 'e', 'r', 'r', 'o', 'r', ' ', 'i', 'n', ' ', 'c', 'p', 'u', 'i', 'n', 'f', 'o', ':', ' ' + }; + cpuinfo_vlog(CPUINFO_LOG_STDERR, fatal_prefix, 24, format, args); + #elif defined(__ANDROID__) + __android_log_vprint(ANDROID_LOG_FATAL, cpuinfo_module, format, args); + #else + #error "Platform-specific implementation required" + #endif + } +#endif diff --git a/3rdparty/cpuinfo/src/riscv/api.h b/3rdparty/cpuinfo/src/riscv/api.h new file mode 100644 index 0000000000000..cd4bf464ff052 --- /dev/null +++ b/3rdparty/cpuinfo/src/riscv/api.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include + +/* RISC-V Vendor IDs. */ +enum cpuinfo_riscv_chipset_vendor { + cpuinfo_riscv_chipset_vendor_unknown = 0, + cpuinfo_riscv_chipset_sifive = 0x489, + cpuinfo_riscv_chipset_vendor_max, +}; + +/* RISC-V Architecture IDs. */ +enum cpuinfo_riscv_chipset_arch { + cpuinfo_riscv_chipset_arch_unknown = 0, + cpuinfo_riscv_chipset_arch_max, +}; + +/* RISC-V Implementation IDs. */ +enum cpuinfo_riscv_chipset_impl { + cpuinfo_riscv_chipset_impl_unknown = 0, + cpuinfo_riscv_chipset_impl_max, +}; + +/** + * Decodes the vendor and micro-architecture based on the provided input + * parameters, regardless of underlying operating system. + * + * @param[vendor_id]: The 'mvendorid' as described by the RISC-V Manual. + * @param[arch_id]: The 'marchid' as described by the RISC-V Manual. + * @param[imp_id]: The 'mimplid' as described by the RISC-V Manual. + * @param[vendor] - Reference to the cpuinfo_vendor to populate. + * @param[uarch] - Reference to the cpuinfo_uarch to populate. + */ +CPUINFO_INTERNAL void cpuinfo_riscv_decode_vendor_uarch( + uint32_t vendor_id, + uint32_t arch_id, + uint32_t imp_id, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]); diff --git a/3rdparty/cpuinfo/src/riscv/linux/api.h b/3rdparty/cpuinfo/src/riscv/linux/api.h new file mode 100644 index 0000000000000..5f1a8cf359cb3 --- /dev/null +++ b/3rdparty/cpuinfo/src/riscv/linux/api.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include + +/** + * Definition of a RISC-V Linux processor. It is composed of the base processor + * definition in "include/cpuinfo.h" and flags specific to RISC-V Linux + * implementations. + */ +struct cpuinfo_riscv_linux_processor { + /* Public ABI cpuinfo structures. */ + struct cpuinfo_processor processor; + struct cpuinfo_core core; + struct cpuinfo_cluster cluster; + struct cpuinfo_package package; + + /** + * Linux-specific flags for the logical processor: + * - Bit field that can be masked with CPUINFO_LINUX_FLAG_*. + */ + uint32_t flags; + + /** + * Minimum processor ID on the cluster which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t cluster_leader_id; + + /** + * Minimum processor ID on the core which includes this logical processor. + * This value can serve as an ID for the core of logical processors: it + * is the same for all logical processors on the same core. + */ + uint32_t core_leader_id; + + /** + * Minimum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the package of logical processors: it + * is the same for all logical processors on the same package. + */ + uint32_t package_leader_id; +}; + +/** + * Reads AT_HWCAP from `getauxval` and populates the cpuinfo_riscv_isa + * structure. + * + * @param[isa] - Reference to cpuinfo_riscv_isa structure to populate. + */ +CPUINFO_INTERNAL void cpuinfo_riscv_linux_decode_isa_from_hwcap( + struct cpuinfo_riscv_isa isa[restrict static 1]); + +/** + * Reads `sys_riscv_hwprobe` and determines the processor vendor and + * micro-architecture. + * + * @param[processor] - The Linux ID of the target processor. + * @param[vendor] - Reference to the cpuinfo_vendor to populate. + * @param[uarch] - Reference to the cpuinfo_uarch to populate. + */ +CPUINFO_INTERNAL void cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe( + uint32_t processor, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]); + +/* Used to determine which uarch is associated with the current thread. */ +extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; diff --git a/3rdparty/cpuinfo/src/riscv/linux/init.c b/3rdparty/cpuinfo/src/riscv/linux/init.c new file mode 100644 index 0000000000000..d1c43c542a616 --- /dev/null +++ b/3rdparty/cpuinfo/src/riscv/linux/init.c @@ -0,0 +1,620 @@ +#include + +#include +#include +#include +#include + +/* ISA structure to hold supported extensions. */ +struct cpuinfo_riscv_isa cpuinfo_isa; + +/* Helper function to bitmask flags and ensure operator precedence. */ +static inline bool bitmask_all(uint32_t flags, uint32_t mask) { + return (flags & mask) == mask; +} + +static int compare_riscv_linux_processors(const void* a, const void* b) { + /** + * For our purposes, it is only relevant that the list is sorted by + * micro-architecture, so the nature of ordering is irrelevant. + */ + return ((const struct cpuinfo_riscv_linux_processor*)a)->core.uarch + - ((const struct cpuinfo_riscv_linux_processor*)b)->core.uarch; +} + +/** + * Parses the core cpus list for each processor. This function is called once + * per-processor, with the IDs of all other processors in the core list. + * + * The 'processor_[start|count]' are populated in the processor's 'core' + * attribute, with 'start' being the smallest ID in the core list. + * + * The 'core_leader_id' of each processor is set to the smallest ID in it's + * cluster CPU list. + * + * Precondition: The element in the 'processors' list must be initialized with + * their 'core_leader_id' to their index in the list. + + * E.g. processors[0].core_leader_id = 0. + */ +static bool core_cpus_parser(uint32_t processor, + uint32_t core_cpus_start, + uint32_t core_cpus_end, + struct cpuinfo_riscv_linux_processor* processors) { + uint32_t processor_start = UINT32_MAX; + uint32_t processor_count = 0; + + /* If the processor already has a leader, use it. */ + if (bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CORE_CLUSTER)) { + processor_start = processors[processor].core_leader_id; + } + + for (size_t core_cpu = core_cpus_start; core_cpu < core_cpus_end; core_cpu++) { + if (!bitmask_all(processors[core_cpu].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + /** + * The first valid processor observed is the smallest ID in the + * list that attaches to this core. + */ + if (processor_start == UINT32_MAX) { + processor_start = core_cpu; + } + processors[core_cpu].core_leader_id = processor_start; + processor_count++; + } + /** + * If the cluster flag has not been set, assign the processor start. If + * it has been set, only apply the processor start if it's less than the + * held value. This can happen if the callback is invoked twice: + * + * e.g. core_cpu_list=1,10-12 + */ + if (!bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CORE_CLUSTER) + || processors[processor].core.processor_start > processor_start) { + processors[processor].core.processor_start = processor_start; + processors[processor].core_leader_id = processor_start; + } + processors[processor].core.processor_count += processor_count; + processors[processor].flags |= CPUINFO_LINUX_FLAG_CORE_CLUSTER; + /* The parser has failed only if no processors were found. */ + return processor_count != 0; +} + +/** + * Parses the cluster cpu list for each processor. This function is called once + * per-processor, with the IDs of all other processors in the cluster. + * + * The 'cluster_leader_id' of each processor is set to the smallest ID in it's + * cluster CPU list. + * + * Precondition: The element in the 'processors' list must be initialized with + * their 'cluster_leader_id' to their index in the list. + * E.g. processors[0].cluster_leader_id = 0. + */ +static bool cluster_cpus_parser(uint32_t processor, + uint32_t cluster_cpus_start, + uint32_t cluster_cpus_end, + struct cpuinfo_riscv_linux_processor* processors) { + uint32_t processor_start = UINT32_MAX; + uint32_t processor_count = 0; + uint32_t core_count = 0; + + /* If the processor already has a leader, use it. */ + if (bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER)) { + processor_start = processors[processor].cluster_leader_id; + } + + for (size_t cluster_cpu = cluster_cpus_start; cluster_cpu < cluster_cpus_end; cluster_cpu++) { + if (!bitmask_all(processors[cluster_cpu].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + /** + * The first valid processor observed is the smallest ID in the + * list that attaches to this core. + */ + if (processor_start == UINT32_MAX) { + processor_start = cluster_cpu; + } + processors[cluster_cpu].cluster_leader_id = processor_start; + processor_count++; + /** + * A processor should only represent it's core if it is the + * assigned leader of that core. + */ + if (processors[cluster_cpu].core_leader_id == cluster_cpu) { + core_count++; + } + } + /** + * If the cluster flag has not been set, assign the processor start. If + * it has been set, only apply the processor start if it's less than the + * held value. This can happen if the callback is invoked twice: + * + * e.g. cluster_cpus_list=1,10-12 + */ + if (!bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER) + || processors[processor].cluster.processor_start > processor_start) { + processors[processor].cluster.processor_start = processor_start; + processors[processor].cluster.core_start = processor_start; + processors[processor].cluster.cluster_id = processor_start; + processors[processor].cluster_leader_id = processor_start; + } + processors[processor].cluster.processor_count += processor_count; + processors[processor].cluster.core_count += core_count; + processors[processor].flags |= CPUINFO_LINUX_FLAG_CLUSTER_CLUSTER; + return true; +} + +/** + * Parses the package cpus list for each processor. This function is called once + * per-processor, with the IDs of all other processors in the package list. + * + * The 'processor_[start|count]' are populated in the processor's 'package' + * attribute, with 'start' being the smallest ID in the package list. + * + * The 'package_leader_id' of each processor is set to the smallest ID in it's + * cluster CPU list. + * + * Precondition: The element in the 'processors' list must be initialized with + * their 'package_leader_id' to their index in the list. + * E.g. processors[0].package_leader_id = 0. + */ +static bool package_cpus_parser(uint32_t processor, + uint32_t package_cpus_start, + uint32_t package_cpus_end, + struct cpuinfo_riscv_linux_processor* processors) { + uint32_t processor_start = UINT32_MAX; + uint32_t processor_count = 0; + uint32_t cluster_count = 0; + uint32_t core_count = 0; + + /* If the processor already has a leader, use it. */ + if (bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) { + processor_start = processors[processor].package_leader_id; + } + + for (size_t package_cpu = package_cpus_start; package_cpu < package_cpus_end; package_cpu++) { + if (!bitmask_all(processors[package_cpu].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + /** + * The first valid processor observed is the smallest ID in the + * list that attaches to this package. + */ + if (processor_start == UINT32_MAX) { + processor_start = package_cpu; + } + processors[package_cpu].package_leader_id = processor_start; + processor_count++; + /** + * A processor should only represent it's core if it is the + * assigned leader of that core, and similarly for it's cluster. + */ + if (processors[package_cpu].cluster_leader_id == package_cpu) { + cluster_count++; + } + if (processors[package_cpu].core_leader_id == package_cpu) { + core_count++; + } + } + /** + * If the cluster flag has not been set, assign the processor start. If + * it has been set, only apply the processor start if it's less than the + * held value. This can happen if the callback is invoked twice: + * + * e.g. package_cpus_list=1,10-12 + */ + if (!bitmask_all(processors[processor].flags, CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) + || processors[processor].package.processor_start > processor_start) { + processors[processor].package.processor_start = processor_start; + processors[processor].package.cluster_start = processor_start; + processors[processor].package.core_start = processor_start; + processors[processor].package_leader_id = processor_start; + } + processors[processor].package.processor_count += processor_count; + processors[processor].package.cluster_count += cluster_count; + processors[processor].package.core_count += core_count; + processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + return true; +} + +/* Initialization for the RISC-V Linux system. */ +void cpuinfo_riscv_linux_init(void) { + struct cpuinfo_riscv_linux_processor* riscv_linux_processors = NULL; + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_package* packages = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; + const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + uint32_t* linux_cpu_to_uarch_index_map = NULL; + + /** + * The interesting set of processors are the number of 'present' + * processors on the system. There may be more 'possible' processors, but + * processor information cannot be gathered on non-present processors. + * + * Note: For SoCs, it is largely the case that all processors are known + * at boot and no processors are hotplugged at runtime, so the + * 'present' and 'possible' list is often the same. + * + * Note: This computes the maximum processor ID of the 'present' + * processors. It is not a count of the number of processors on the + * system. + */ + const uint32_t max_processor_id = 1 + + cpuinfo_linux_get_max_present_processor( + cpuinfo_linux_get_max_processors_count()); + if (max_processor_id == 0) { + cpuinfo_log_error("failed to discover any processors"); + return; + } + + /** + * Allocate space to store all processor information. This array is + * sized to the max processor ID as opposed to the number of 'present' + * processors, to leverage pointer math in the common utility functions. + */ + riscv_linux_processors = calloc(max_processor_id, + sizeof(struct cpuinfo_riscv_linux_processor)); + if (riscv_linux_processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" processors.", + max_processor_id * sizeof(struct cpuinfo_riscv_linux_processor), + max_processor_id); + goto cleanup; + } + + /** + * Attempt to detect all processors and apply the corresponding flag to + * each processor struct that we find. + */ + if (!cpuinfo_linux_detect_present_processors(max_processor_id, + &riscv_linux_processors->flags, + sizeof(struct cpuinfo_riscv_linux_processor), + CPUINFO_LINUX_FLAG_PRESENT | CPUINFO_LINUX_FLAG_VALID)) { + cpuinfo_log_error("failed to detect present processors"); + goto cleanup; + } + + /* Populate processor information. */ + for (size_t processor = 0; processor < max_processor_id; processor++) { + if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + /* TODO: Determine if an 'smt_id' is available. */ + riscv_linux_processors[processor].processor.linux_id = processor; + } + + /* Populate core information. */ + for (size_t processor = 0; processor < max_processor_id; processor++) { + if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + + /* Populate processor start and count information. */ + if (!cpuinfo_linux_detect_core_cpus( + max_processor_id, + processor, + (cpuinfo_siblings_callback) core_cpus_parser, + riscv_linux_processors)) { + cpuinfo_log_error("failed to detect core cpus for processor %zu.", processor); + goto cleanup; + } + + /* Populate core ID information. */ + if (cpuinfo_linux_get_processor_core_id( + processor, + &riscv_linux_processors[processor].core.core_id)) { + riscv_linux_processors[processor].flags |= CPUINFO_LINUX_FLAG_CORE_ID; + } + + /** + * Populate the vendor and uarch of this core from this + * processor. When the final 'cores' list is constructed, only + * the values from the core leader will be honored. + */ + cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe( + processor, + &riscv_linux_processors[processor].core.vendor, + &riscv_linux_processors[processor].core.uarch); + + /* Populate frequency information of this core. */ + uint32_t frequency = cpuinfo_linux_get_processor_cur_frequency(processor); + if (frequency != 0) { + riscv_linux_processors[processor].core.frequency = frequency; + riscv_linux_processors[processor].flags |= CPUINFO_LINUX_FLAG_CUR_FREQUENCY; + } + } + + /* Populate cluster information. */ + for (size_t processor = 0; processor < max_processor_id; processor++) { + if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + if (!cpuinfo_linux_detect_cluster_cpus( + max_processor_id, + processor, + (cpuinfo_siblings_callback) cluster_cpus_parser, + riscv_linux_processors)) { + cpuinfo_log_warning("failed to detect cluster cpus for processor %zu.", processor); + goto cleanup; + } + + /** + * Populate the vendor, uarch and frequency of this cluster from + * this logical processor. When the 'clusters' list is constructed, + * only the values from the cluster leader will be honored. + */ + riscv_linux_processors[processor].cluster.vendor = + riscv_linux_processors[processor].core.vendor; + riscv_linux_processors[processor].cluster.uarch = + riscv_linux_processors[processor].core.uarch; + riscv_linux_processors[processor].cluster.frequency = + riscv_linux_processors[processor].core.frequency; + } + + /* Populate package information. */ + for (size_t processor = 0; processor < max_processor_id; processor++) { + if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + if (!cpuinfo_linux_detect_package_cpus( + max_processor_id, + processor, + (cpuinfo_siblings_callback) package_cpus_parser, + riscv_linux_processors)) { + cpuinfo_log_warning("failed to detect package cpus for processor %zu.", processor); + goto cleanup; + } + } + + /* Populate ISA structure with hwcap information. */ + cpuinfo_riscv_linux_decode_isa_from_hwcap(&cpuinfo_isa); + + /** + * To efficiently compute the number of unique micro-architectures + * present on the system, sort the processor list by micro-architecture + * and then scan through the list to count the differences. + * + * Ensure this is done at the end of composing the processor list - the + * parsing functions assume that the position of the processor in the + * list matches it's Linux ID, which this sorting operation breaks. + */ + qsort(riscv_linux_processors, + max_processor_id, + sizeof(struct cpuinfo_riscv_linux_processor), + compare_riscv_linux_processors); + + /** + * Determine the number of *valid* detected processors, cores, + * clusters, packages and uarchs in the list. + */ + size_t valid_processors_count = 0; + size_t valid_cores_count = 0; + size_t valid_clusters_count = 0; + size_t valid_packages_count = 0; + size_t valid_uarchs_count = 0; + enum cpuinfo_uarch last_uarch = cpuinfo_uarch_unknown; + for (size_t processor = 0; processor < max_processor_id; processor++) { + if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + + /** + * All comparisons to the leader id values MUST be done against + * the 'linux_id' as opposed to 'processor'. The sort function + * above no longer allows us to make the assumption that these + * two values are the same. + */ + uint32_t linux_id = riscv_linux_processors[processor].processor.linux_id; + + valid_processors_count++; + if (riscv_linux_processors[processor].core_leader_id == linux_id) { + valid_cores_count++; + } + if (riscv_linux_processors[processor].cluster_leader_id == linux_id) { + valid_clusters_count++; + } + if (riscv_linux_processors[processor].package_leader_id == linux_id) { + valid_packages_count++; + } + /** + * As we've sorted by micro-architecture, when the uarch differs + * between two entries, a unique uarch has been observed. + */ + if (last_uarch != riscv_linux_processors[processor].core.uarch + || valid_uarchs_count == 0) { + valid_uarchs_count++; + last_uarch = riscv_linux_processors[processor].core.uarch; + } + } + + /* Allocate and populate final public ABI structures. */ + processors = calloc(valid_processors_count, + sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %zu processors.", + valid_processors_count * sizeof(struct cpuinfo_processor), + valid_processors_count); + goto cleanup; + } + + cores = calloc(valid_cores_count, + sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %zu cores.", + valid_cores_count * sizeof(struct cpuinfo_core), + valid_cores_count); + goto cleanup; + } + + clusters = calloc(valid_clusters_count, + sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %zu clusters.", + valid_clusters_count * sizeof(struct cpuinfo_cluster), + valid_clusters_count); + goto cleanup; + } + + packages = calloc(valid_packages_count, + sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %zu packages.", + valid_packages_count * sizeof(struct cpuinfo_package), + valid_packages_count); + goto cleanup; + } + + uarchs = calloc(valid_uarchs_count, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %zu packages.", + valid_uarchs_count * sizeof(struct cpuinfo_uarch_info), + valid_uarchs_count); + goto cleanup; + } + + linux_cpu_to_processor_map = calloc(max_processor_id, + sizeof(struct cpuinfo_processor*)); + if (linux_cpu_to_processor_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" processor map.", + max_processor_id * sizeof(struct cpuinfo_processor*), + max_processor_id); + goto cleanup; + } + + linux_cpu_to_core_map = calloc(max_processor_id, + sizeof(struct cpuinfo_core*)); + if (linux_cpu_to_core_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" core map.", + max_processor_id * sizeof(struct cpuinfo_core*), + max_processor_id); + goto cleanup; + } + + linux_cpu_to_uarch_index_map = calloc(max_processor_id, + sizeof(struct cpuinfo_uarch_info*)); + if (linux_cpu_to_uarch_index_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch map.", + max_processor_id * sizeof(struct cpuinfo_uarch_info*), + max_processor_id); + goto cleanup; + } + + /* Transfer contents of processor list to ABI structures. */ + size_t valid_processors_index = 0; + size_t valid_cores_index = 0; + size_t valid_clusters_index = 0; + size_t valid_packages_index = 0; + size_t valid_uarchs_index = 0; + last_uarch = cpuinfo_uarch_unknown; + for (size_t processor = 0; processor < max_processor_id; processor++) { + if (!bitmask_all(riscv_linux_processors[processor].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + + /** + * All comparisons to the leader id values MUST be done against + * the 'linux_id' as opposed to 'processor'. The sort function + * above no longer allows us to make the assumption that these + * two values are the same. + */ + uint32_t linux_id = riscv_linux_processors[processor].processor.linux_id; + + /* Create uarch entry if this uarch has not been seen before. */ + if (last_uarch != riscv_linux_processors[processor].core.uarch + || valid_uarchs_index == 0) { + uarchs[valid_uarchs_index++].uarch = + riscv_linux_processors[processor].core.uarch; + last_uarch = riscv_linux_processors[processor].core.uarch; + } + + /* Copy cpuinfo_processor information. */ + memcpy(&processors[valid_processors_index++], + &riscv_linux_processors[processor].processor, + sizeof(struct cpuinfo_processor)); + + /* Update uarch processor count. */ + uarchs[valid_uarchs_index - 1].processor_count++; + + /* Copy cpuinfo_core information, if this is the leader. */ + if (riscv_linux_processors[processor].core_leader_id == linux_id) { + memcpy(&cores[valid_cores_index++], + &riscv_linux_processors[processor].core, + sizeof(struct cpuinfo_core)); + /* Update uarch core count. */ + uarchs[valid_uarchs_index - 1].core_count++; + } + + /* Copy cpuinfo_cluster information, if this is the leader. */ + if (riscv_linux_processors[processor].cluster_leader_id == linux_id) { + memcpy(&clusters[valid_clusters_index++], + &riscv_linux_processors[processor].cluster, + sizeof(struct cpuinfo_cluster)); + } + + /* Copy cpuinfo_package information, if this is the leader. */ + if (riscv_linux_processors[processor].package_leader_id == linux_id) { + memcpy(&packages[valid_packages_index++], + &riscv_linux_processors[processor].package, + sizeof(struct cpuinfo_package)); + } + + /* Commit pointers on the final structures. */ + processors[valid_processors_index - 1].core = &cores[valid_cores_index - 1]; + processors[valid_processors_index - 1].cluster = &clusters[valid_clusters_index - 1]; + processors[valid_processors_index - 1].package = &packages[valid_packages_index - 1]; + + cores[valid_cores_index - 1].cluster = &clusters[valid_clusters_index - 1]; + cores[valid_cores_index - 1].package = &packages[valid_packages_index - 1]; + + clusters[valid_clusters_index - 1].package = &packages[valid_packages_index - 1]; + + linux_cpu_to_processor_map[linux_id] = &processors[valid_processors_index - 1]; + linux_cpu_to_core_map[linux_id] = &cores[valid_cores_index - 1]; + linux_cpu_to_uarch_index_map[linux_id] = valid_uarchs_index - 1; + } + + /* Commit */ + cpuinfo_processors = processors; + cpuinfo_processors_count = valid_processors_count; + cpuinfo_cores = cores; + cpuinfo_cores_count = valid_cores_count; + cpuinfo_clusters = clusters; + cpuinfo_clusters_count = valid_clusters_count; + cpuinfo_packages = packages; + cpuinfo_packages_count = valid_packages_count; + cpuinfo_uarchs = uarchs; + cpuinfo_uarchs_count = valid_uarchs_count; + + cpuinfo_linux_cpu_max = max_processor_id; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map; + + __sync_synchronize(); + + cpuinfo_is_initialized = true; + + /* Mark all public structures NULL to prevent cleanup from erasing them. */ + processors = NULL; + cores = NULL; + clusters = NULL; + packages = NULL; + uarchs = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; + linux_cpu_to_uarch_index_map = NULL; +cleanup: + free(riscv_linux_processors); + free(processors); + free(cores); + free(clusters); + free(packages); + free(uarchs); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); + free(linux_cpu_to_uarch_index_map); +} diff --git a/3rdparty/cpuinfo/src/riscv/linux/riscv-hw.c b/3rdparty/cpuinfo/src/riscv/linux/riscv-hw.c new file mode 100644 index 0000000000000..ccee848cf48e5 --- /dev/null +++ b/3rdparty/cpuinfo/src/riscv/linux/riscv-hw.c @@ -0,0 +1,62 @@ +#include +#include + +#include +#include +#include + +void cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe( + uint32_t processor, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]) { + struct riscv_hwprobe pairs[] = { + { .key = RISCV_HWPROBE_KEY_MVENDORID, }, + { .key = RISCV_HWPROBE_KEY_MARCHID, }, + { .key = RISCV_HWPROBE_KEY_MIMPID, }, + }; + const size_t pairs_count = sizeof(pairs) / sizeof(struct riscv_hwprobe); + + /* In case of failure, report unknown. */ + *vendor = cpuinfo_vendor_unknown; + *uarch = cpuinfo_uarch_unknown; + + /* Create a CPU set with this processor flagged. */ + const size_t cpu_set_size = processor + 1; + cpu_set_t* cpu_set = CPU_ALLOC(cpu_set_size); + CPU_SET(processor, cpu_set); + + /* Request all available information from hwprobe. */ + int ret = __riscv_hwprobe(pairs, pairs_count, + cpu_set_size, (unsigned long*)cpu_set, + 0 /* flags */); + if (ret < 0) { + cpuinfo_log_warning("failed to get hwprobe information, err: %d", ret); + return; + } + + /** + * The syscall may not have populated all requested keys, loop through + * the list and store the values that were discovered. + */ + uint32_t vendor_id = 0; + uint32_t arch_id = 0; + uint32_t imp_id = 0; + for (size_t pair = 0; pair < pairs_count; pair++) { + switch (pairs[pair].key) { + case RISCV_HWPROBE_KEY_MVENDORID: + vendor_id = pairs[pair].value; + break; + case RISCV_HWPROBE_KEY_MARCHID: + arch_id = pairs[pair].value; + break; + case RISCV_HWPROBE_KEY_MIMPID: + imp_id = pairs[pair].value; + break; + default: + /* The key value may be -1 if unsupported. */ + break; + } + } + cpuinfo_riscv_decode_vendor_uarch(vendor_id, arch_id, imp_id, + vendor, uarch); +} diff --git a/3rdparty/cpuinfo/src/riscv/linux/riscv-isa.c b/3rdparty/cpuinfo/src/riscv/linux/riscv-isa.c new file mode 100644 index 0000000000000..ace451b8582bf --- /dev/null +++ b/3rdparty/cpuinfo/src/riscv/linux/riscv-isa.c @@ -0,0 +1,44 @@ +#include +#include + +#include + +/** + * arch/riscv/include/uapi/asm/hwcap.h + * + * This must be kept in sync with the upstream kernel header. + */ +#define COMPAT_HWCAP_ISA_I (1 << ('I' - 'A')) +#define COMPAT_HWCAP_ISA_M (1 << ('M' - 'A')) +#define COMPAT_HWCAP_ISA_A (1 << ('A' - 'A')) +#define COMPAT_HWCAP_ISA_F (1 << ('F' - 'A')) +#define COMPAT_HWCAP_ISA_D (1 << ('D' - 'A')) +#define COMPAT_HWCAP_ISA_C (1 << ('C' - 'A')) +#define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A')) + +void cpuinfo_riscv_linux_decode_isa_from_hwcap( + struct cpuinfo_riscv_isa isa[restrict static 1]) { + const unsigned long hwcap = getauxval(AT_HWCAP); + + if (hwcap & COMPAT_HWCAP_ISA_I) { + isa->i = true; + } + if (hwcap & COMPAT_HWCAP_ISA_M) { + isa->m = true; + } + if (hwcap & COMPAT_HWCAP_ISA_A) { + isa->a = true; + } + if (hwcap & COMPAT_HWCAP_ISA_F) { + isa->f = true; + } + if (hwcap & COMPAT_HWCAP_ISA_D) { + isa->d = true; + } + if (hwcap & COMPAT_HWCAP_ISA_C) { + isa->c = true; + } + if (hwcap & COMPAT_HWCAP_ISA_V) { + isa->v = true; + } +} diff --git a/3rdparty/cpuinfo/src/riscv/uarch.c b/3rdparty/cpuinfo/src/riscv/uarch.c new file mode 100644 index 0000000000000..bf93e867d7553 --- /dev/null +++ b/3rdparty/cpuinfo/src/riscv/uarch.c @@ -0,0 +1,27 @@ +#include + +#include +#include + +void cpuinfo_riscv_decode_vendor_uarch( + uint32_t vendor_id, + uint32_t arch_id, + uint32_t imp_id, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]) { + /* The vendor ID is sufficient to determine the cpuinfo_vendor. */ + switch(vendor_id) { + case cpuinfo_riscv_chipset_sifive: + *vendor = cpuinfo_vendor_sifive; + break; + default: + *vendor = cpuinfo_vendor_unknown; + cpuinfo_log_warning("unknown vendor ID: %"PRIu32, vendor_id); + break; + } + /** + * TODO: Add support for parsing chipset architecture and implementation + * IDs here, when a chipset of interest comes along. + */ + *uarch = cpuinfo_uarch_unknown; +} diff --git a/3rdparty/cpuinfo/src/x86/isa.c b/3rdparty/cpuinfo/src/x86/isa.c index f2e5a281bca2c..3f36cee148f9a 100644 --- a/3rdparty/cpuinfo/src/x86/isa.c +++ b/3rdparty/cpuinfo/src/x86/isa.c @@ -490,6 +490,18 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( */ isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100)); + /* + * AVX512_FP16 instructions: + * - Intel: edx[bit 23] in structured feature info (ecx = 0). + */ + isa.avx512fp16 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00800000)); + + /* + * AVX_VNNI instructions: + * - Intel: eax[bit 4] in structured feature info (ecx = 1). + */ + isa.avxvnni = avx_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000010)); + /* * AVX512_BF16 instructions: * - Intel: eax[bit 5] in structured feature info (ecx = 1). diff --git a/3rdparty/cpuinfo/src/x86/name.c b/3rdparty/cpuinfo/src/x86/name.c index a7cc7c68cb876..38c47a346530f 100644 --- a/3rdparty/cpuinfo/src/x86/name.c +++ b/3rdparty/cpuinfo/src/x86/name.c @@ -234,7 +234,7 @@ static bool transform_token(char* token_start, char* token_end, struct parser_st return true; } /* - * Erase everywhing after "SOC" on AMD System-on-Chips, e.g. + * Erase everything after "SOC" on AMD System-on-Chips, e.g. * "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0" */ if (erase_matching(token_start, token_length, "SOC")) { @@ -581,7 +581,7 @@ uint32_t cpuinfo_x86_normalize_brand_string( /* Iterate through all tokens and erase redundant parts */ { bool is_token = false; - char* token_start; + char* token_start = NULL; for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { if (*char_ptr == ' ') { if (is_token) { @@ -619,7 +619,7 @@ uint32_t cpuinfo_x86_normalize_brand_string( /* Compact tokens: collapse multiple spacing into one */ { char* output_ptr = normalized_name; - char* token_start; + char* token_start = NULL; bool is_token = false; bool previous_token_ends_with_dash = true; bool current_token_starts_with_dash = false; diff --git a/3rdparty/cpuinfo/src/x86/uarch.c b/3rdparty/cpuinfo/src/x86/uarch.c index 3705499422d43..a38d7b0555a41 100644 --- a/3rdparty/cpuinfo/src/x86/uarch.c +++ b/3rdparty/cpuinfo/src/x86/uarch.c @@ -195,35 +195,37 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( } break; case 0x16: - if (model_info->model >= 0x03) { + if (model_info->extended_model >= 0x03) { return cpuinfo_uarch_puma; } else { return cpuinfo_uarch_jaguar; } case 0x17: - switch (model_info->model) { - case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl - case 0x08: // 12 nm Pinnacle Ridge - case 0x11: // 14 nm Raven Ridge, Great Horned Owl - case 0x18: // 12 nm Picasso + switch (model_info->extended_model) { + case 0x0: // model 01h -> 14 nm Naples/Whitehaven/Summit Ridge/Snowy Owl, model 08h -> 12 nm Colfax/Pinnacle Ridge + case 0x1: // model 11h -> 14 nm Raven Ridge/Great Horned Owl, model 18h -> 14 nm Banded Kestrel / 12 nm Picasso return cpuinfo_uarch_zen; - case 0x31: // Rome, Castle Peak - case 0x60: // Renoir - case 0x68: // Lucienne - case 0x71: // Matisse - case 0x90: // Van Gogh - case 0x98: // Mero + case 0x3: // model 31h -> Rome/Castle Peak + case 0x4: // model 47h -> Xbox Series X + case 0x6: // model 60h -> Renoir/Grey Hawk, model 68h -> Lucienne + case 0x7: // model 71h -> Matisse + case 0x9: // model 90h -> Van Gogh, model 98h -> Mero return cpuinfo_uarch_zen2; } break; case 0x19: - switch (model_info->model) { - case 0x01: // Genesis - case 0x21: // Vermeer - case 0x30: // Badami, Trento - case 0x40: // Rembrandt - case 0x50: // Cezanne + switch (model_info->extended_model) { + case 0x0: // model 00h -> Genesis, model 01h -> Milan, model 08h -> Chagall + case 0x2: // model 21h -> Vermeer + case 0x3: // model 30h -> Badami, Trento + case 0x4: // model 40h -> Rembrandt + case 0x5: // model 50h -> Cezanne return cpuinfo_uarch_zen3; + case 0x1: // model 10h..1Fh -> Stones + case 0x6: // model 60h..6Fh -> Raphael + case 0x7: // model 70h..77h -> Phoenix/Hawkpoint1, model 78h..7Fh -> Phoenix 2/Hawkpoint2 + case 0xA: // model A0h..AFh -> Stones-Dense + return cpuinfo_uarch_zen4; } break; }