diff --git a/CMakeLists.txt b/CMakeLists.txt index fd03df49..cc15c4f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,6 +78,17 @@ include(${PROJECT_SOURCE_DIR}/cmake/dependencies.cmake) add_library(umd_common_directories INTERFACE) target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/device ${PROJECT_SOURCE_DIR}/third_party/fmt/include) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "i386") + message(STATUS "Building for host arch - x86") + target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR}/device/arch/x86) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + message(STATUS "Building UMD for host arch - ARM") + target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR}/device/arch/aarch64) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv") + message(STATUS "Building UMD for host arch - riscv") + target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR}/device/arch/riscv) +endif() + if(NOT DEFINED ENV{ARCH_NAME}) message(FATAL_ERROR "Please set ARCH_NAME to grayskull, wormhole_b0, or blackhole") elseif($ENV{ARCH_NAME} STREQUAL "grayskull") diff --git a/device/arch/aarch64/driver_atomics.h b/device/arch/aarch64/driver_atomics.h new file mode 100644 index 00000000..874bf8f6 --- /dev/null +++ b/device/arch/aarch64/driver_atomics.h @@ -0,0 +1,27 @@ +/* + * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +namespace tt_driver_atomics { + +static inline __attribute__((always_inline)) void sfence() { + // Full memory barrier (full system). ARM does not have a Store-Any barrier. + // https://developer.arm.com/documentation/100941/0101/Barriers + asm volatile ("DMB SY" : : : "memory"); +} + +static inline __attribute__((always_inline)) void lfence() { + // Load-Any barrier (full system) + // https://developer.arm.com/documentation/100941/0101/Barriers + asm volatile ("DMB LD" : : : "memory"); +} + +static inline __attribute__((always_inline)) void mfence() { + // Full memory barrier (full system). + // https://developer.arm.com/documentation/100941/0101/Barriers + asm volatile ("DMB SY" : : : "memory"); +} + +} // namespace tt_driver_atomics \ No newline at end of file diff --git a/device/arch/riscv/driver_atomics.h b/device/arch/riscv/driver_atomics.h new file mode 100644 index 00000000..82df936f --- /dev/null +++ b/device/arch/riscv/driver_atomics.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +namespace tt_driver_atomics { + +static inline __attribute__((always_inline)) void sfence() { + asm volatile ("fence ow, ow" : : : "memory"); +} + +static inline __attribute__((always_inline)) void lfence() { + asm volatile ("fence ir, ir" : : : "memory"); +} + +static inline __attribute__((always_inline)) void mfence() { + asm volatile ("fence iorw, iorw" : : : "memory"); +} + +} // namespace tt_driver_atomics \ No newline at end of file diff --git a/device/arch/x86/driver_atomics.h b/device/arch/x86/driver_atomics.h new file mode 100644 index 00000000..5433d608 --- /dev/null +++ b/device/arch/x86/driver_atomics.h @@ -0,0 +1,24 @@ +/* + * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +namespace tt_driver_atomics { + +// Store-Any barrier. +static inline __attribute__((always_inline)) void sfence() { + _mm_sfence(); +} +// Load-Any barrier. +static inline __attribute__((always_inline)) void lfence() { + _mm_lfence(); +} +// Any-Any barrier. +static inline __attribute__((always_inline)) void mfence() { + _mm_mfence(); +} + +} // namespace tt_driver_atomics \ No newline at end of file diff --git a/device/device_api_metal.h b/device/device_api_metal.h index 0fc7820c..76bae8e3 100644 --- a/device/device_api_metal.h +++ b/device/device_api_metal.h @@ -6,4 +6,4 @@ #pragma once #include "device/tt_device.h" -#include "device/driver_atomics.h" +#include "driver_atomics.h" diff --git a/device/driver_atomics.h b/device/driver_atomics.h deleted file mode 100644 index cbf4f6c7..00000000 --- a/device/driver_atomics.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -#if defined(__x86_64__) || defined(__i386__) -#include -#endif - -namespace tt_driver_atomics { - -#if defined(__x86_64__) || defined(__i386__) -// Store-Any barrier. -static inline __attribute__((always_inline)) void sfence() { - _mm_sfence(); -} -// Load-Any barrier. -static inline __attribute__((always_inline)) void lfence() { - _mm_lfence(); -} -// Any-Any barrier. -static inline __attribute__((always_inline)) void mfence() { - _mm_mfence(); -} - -#elif defined(__ARM_ARCH) - -static inline __attribute__((always_inline)) void sfence() { - // Full memory barrier (full system). ARM does not have a Store-Any barrier. - // https://developer.arm.com/documentation/100941/0101/Barriers - asm volatile ("DMB SY" : : : "memory"); -} - -static inline __attribute__((always_inline)) void lfence() { - // Load-Any barrier (full system) - // https://developer.arm.com/documentation/100941/0101/Barriers - asm volatile ("DMB LD" : : : "memory"); -} - -static inline __attribute__((always_inline)) void mfence() { - // Full memory barrier (full system). - // https://developer.arm.com/documentation/100941/0101/Barriers - asm volatile ("DMB SY" : : : "memory"); -} - -#elif defined(__riscv) - -static inline __attribute__((always_inline)) void sfence() { - asm volatile ("fence ow, ow" : : : "memory"); -} - -static inline __attribute__((always_inline)) void lfence() { - asm volatile ("fence ir, ir" : : : "memory"); -} - -static inline __attribute__((always_inline)) void mfence() { - asm volatile ("fence iorw, iorw" : : : "memory"); -} - -#else -#error "Unsupported architecture" -#endif - -} // namespace tt_driver_atomics \ No newline at end of file diff --git a/device/simulation/deprecated/tt_versim_device.cpp b/device/simulation/deprecated/tt_versim_device.cpp index e7ac7506..638ad4d0 100644 --- a/device/simulation/deprecated/tt_versim_device.cpp +++ b/device/simulation/deprecated/tt_versim_device.cpp @@ -5,7 +5,7 @@ #include "tt_device.h" -#include "device/driver_atomics.h" +#include "driver_atomics.h" #include "common/logger.hpp" #include #include diff --git a/device/simulation/tt_simulation_device.cpp b/device/simulation/tt_simulation_device.cpp index 0cc795aa..906af4ab 100644 --- a/device/simulation/tt_simulation_device.cpp +++ b/device/simulation/tt_simulation_device.cpp @@ -11,7 +11,7 @@ #include "common/logger.hpp" #include "common/assert.hpp" -#include "device/driver_atomics.h" +#include "driver_atomics.h" #include "device/tt_cluster_descriptor.h" #include "tt_simulation_device.h" diff --git a/device/tt_silicon_driver.cpp b/device/tt_silicon_driver.cpp index f406fc6f..04c2b1f3 100644 --- a/device/tt_silicon_driver.cpp +++ b/device/tt_silicon_driver.cpp @@ -58,7 +58,7 @@ #include #include "device/cpuset_lib.hpp" #include "common/logger.hpp" -#include "device/driver_atomics.h" +#include "driver_atomics.h" #define WHT "\e[0;37m" #define BLK "\e[0;30m"