#8579: change TTNN_TENSOR_PRINT_PROFILE from inline to extern

tenstorrent · May 22, 2024 · 2896971 · 2896971
1 parent b2d88e4
commit 2896971
Show file tree

Hide file tree

Showing 6 changed files with 167 additions and 159 deletions.
diff --git a/tt_eager/tensor/tensor_impl.cpp b/tt_eager/tensor/tensor_impl.cpp
@@ -11,6 +11,8 @@ namespace tt_metal {
 
 namespace tensor_impl {
 
+TensorPrintProfile TTNN_TENSOR_PRINT_PROFILE = TensorPrintProfile::Short;
+
 std::ostream& operator<<(std::ostream& os, const DataType& dtype) {
     switch (dtype) {
         case DataType::BFLOAT8_B: os << "bfloat8_b"; break;

diff --git a/tt_eager/tensor/tensor_impl.hpp b/tt_eager/tensor/tensor_impl.hpp
@@ -707,7 +707,7 @@ enum class TensorPrintProfile {
     Full,
 };
 
-inline TensorPrintProfile TTNN_TENSOR_PRINT_PROFILE = TensorPrintProfile::Short;
+extern TensorPrintProfile TTNN_TENSOR_PRINT_PROFILE;
 
 namespace detail {
 

diff --git a/tt_eager/tt_dnn/op_library/operation_history.cpp b/tt_eager/tt_dnn/op_library/operation_history.cpp
@@ -8,16 +8,13 @@ namespace tt {
 
 namespace tt_metal {
 
-
 #ifdef DEBUG
 
 namespace operation_history {
 
 namespace detail {
 
-OperationHistory::~OperationHistory() {
-    this->dump_to_csv();
-}
+OperationHistory::~OperationHistory() { this->dump_to_csv(); }
 
 void OperationHistory::append(OperationRecord&& record) {
     std::scoped_lock<std::mutex> lock(op_history_mutex);
@@ -132,15 +129,13 @@ void OperationHistory::clear() {
     this->records.clear();
 }
 
+OperationHistory OPERATION_HISTORY{};
+
 }  // namespace detail
 
-const char* csv_file_name() {
-    return std::getenv("OPERATION_HISTORY_CSV");
-}
+const char* csv_file_name() { return std::getenv("OPERATION_HISTORY_CSV"); }
 
-bool enabled() {
-    return csv_file_name() != nullptr;
-}
+bool enabled() { return csv_file_name() != nullptr; }
 
 void dump_to_csv() { detail::OPERATION_HISTORY.dump_to_csv(); }
 void clear() { detail::OPERATION_HISTORY.clear(); }

diff --git a/tt_eager/tt_dnn/op_library/operation_history.hpp b/tt_eager/tt_dnn/op_library/operation_history.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <tt_eager/tensor/tensor.hpp>
+
 #include "tt_dnn/op_library/operation.hpp"
 
 namespace tt {
@@ -22,10 +23,15 @@ struct TensorRecord {
     const Layout layout;
     const std::optional<MemoryConfig> memory_config;
 
-    static constexpr auto attribute_names = std::make_tuple("storage_type", "shape", "data_type", "layout", "memory_config");
+    static constexpr auto attribute_names =
+        std::make_tuple("storage_type", "shape", "data_type", "layout", "memory_config");
     const auto attribute_values() const {
         return std::make_tuple(
-            std::cref(this->storage_type), std::cref(this->shape), std::cref(this->data_type), std::cref(this->layout), std::cref(this->memory_config));
+            std::cref(this->storage_type),
+            std::cref(this->shape),
+            std::cref(this->data_type),
+            std::cref(this->layout),
+            std::cref(this->memory_config));
     }
 };
 
@@ -54,12 +60,12 @@ struct OperationHistory {
     std::vector<OperationRecord> records;
 };
 
-inline OperationHistory OPERATION_HISTORY{};
+extern OperationHistory OPERATION_HISTORY;
 
 }  // namespace detail
 
-template<typename ... Args>
-inline void append(Args&& ... args) {
+template <typename... Args>
+inline void append(Args&&... args) {
     detail::OPERATION_HISTORY.append(std::forward<Args>(args)...);
 }
 

diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
@@ -4,8 +4,8 @@
 
 #include "tt_metal/impl/buffers/buffer.hpp"
 
-#include "tt_metal/common/assert.hpp"
 #include "llrt/llrt.hpp"
+#include "tt_metal/common/assert.hpp"
 #include "tt_metal/common/math.hpp"
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/hostdevcommon/common_values.hpp"
@@ -17,28 +17,36 @@ namespace tt {
 
 namespace tt_metal {
 
-bool is_sharded(const TensorMemoryLayout & layout){
+bool is_sharded(const TensorMemoryLayout &layout) {
     return (
-        layout == TensorMemoryLayout::HEIGHT_SHARDED ||
-        layout == TensorMemoryLayout::WIDTH_SHARDED ||
-        layout == TensorMemoryLayout::BLOCK_SHARDED );
+        layout == TensorMemoryLayout::HEIGHT_SHARDED || layout == TensorMemoryLayout::WIDTH_SHARDED ||
+        layout == TensorMemoryLayout::BLOCK_SHARDED);
 }
 
-
-void validate_buffer_size_and_page_size(uint64_t size, uint64_t page_size, const BufferType &buffer_type, const TensorMemoryLayout &buffer_layout, std::optional<ShardSpecBuffer> shard_parameters) {
+void validate_buffer_size_and_page_size(
+    uint64_t size,
+    uint64_t page_size,
+    const BufferType &buffer_type,
+    const TensorMemoryLayout &buffer_layout,
+    std::optional<ShardSpecBuffer> shard_parameters) {
     TT_FATAL(size != 0 and page_size != 0, "Buffer size and page size should be larger than 0 bytes!");
     bool valid_page_size = (size % page_size == 0);
-    TT_FATAL(valid_page_size, "For valid non-interleaved buffers page size {} must equal buffer size {}. For interleaved-buffers page size should be divisible by buffer size", page_size, size);
-    TT_FATAL(page_size % sizeof(uint32_t) == 0, "Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values");
-    if(buffer_layout == TensorMemoryLayout::SINGLE_BANK){
-        TT_ASSERT(page_size == size , "Continguous buffer must be one contiguous page");
-    }
-    else if(is_sharded(buffer_layout)){
-        TT_ASSERT(shard_parameters != std::nullopt , "Sharded buffers must have a core grid assigned");
+    TT_FATAL(
+        valid_page_size,
+        "For valid non-interleaved buffers page size {} must equal buffer size {}. For interleaved-buffers page size "
+        "should be divisible by buffer size",
+        page_size,
+        size);
+    TT_FATAL(
+        page_size % sizeof(uint32_t) == 0,
+        "Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values");
+    if (buffer_layout == TensorMemoryLayout::SINGLE_BANK) {
+        TT_ASSERT(page_size == size, "Continguous buffer must be one contiguous page");
+    } else if (is_sharded(buffer_layout)) {
+        TT_ASSERT(shard_parameters != std::nullopt, "Sharded buffers must have a core grid assigned");
     }
 }
 
-
 inline std::tuple<std::vector<std::vector<uint32_t>>, std::vector<std::array<uint32_t, 2>>> core_to_host_pages(
     const uint32_t &total_pages,
     const uint32_t &pages_per_shard,
@@ -105,21 +113,27 @@ inline std::tuple<std::vector<std::vector<uint32_t>>, std::vector<std::array<uin
     return {ret_vec, ret_shard_shape};
 }
 
-
-Buffer::Buffer(Device *device, uint64_t size, uint64_t page_size, const BufferType buffer_type,
-                const TensorMemoryLayout buffer_layout,
-                std::optional< ShardSpecBuffer> shard_parameters,
-                bool allocate)
-    : device_(device), size_(size), page_size_(page_size), buffer_type_(buffer_type), buffer_layout_(buffer_layout), shard_parameters_(shard_parameters) {
+Buffer::Buffer(
+    Device *device,
+    uint64_t size,
+    uint64_t page_size,
+    const BufferType buffer_type,
+    const TensorMemoryLayout buffer_layout,
+    std::optional<ShardSpecBuffer> shard_parameters,
+    bool allocate) :
+    device_(device),
+    size_(size),
+    page_size_(page_size),
+    buffer_type_(buffer_type),
+    buffer_layout_(buffer_layout),
+    shard_parameters_(shard_parameters) {
     TT_FATAL(this->device_ != nullptr and this->device_->allocator_ != nullptr);
     validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
     if (allocate) {
         this->allocate();
     }
 }
 
-
-
 BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer) {
     BufferPageMapping buffer_page_mapping;
     bool row_major = buffer.shard_spec().orientation() == ShardOrientation::ROW_MAJOR;
@@ -128,7 +142,7 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer) {
     buffer_page_mapping.all_cores_ = corerange_to_cores(buffer.shard_spec().grid(), num_cores, row_major);
     TT_ASSERT(num_cores == buffer_page_mapping.all_cores_.size());
     uint32_t core_id = 0;
-    for (const auto& core : buffer_page_mapping.all_cores_) {
+    for (const auto &core : buffer_page_mapping.all_cores_) {
         buffer_page_mapping.core_to_core_id_.insert({core, core_id});
         core_id++;
     }
@@ -180,7 +194,6 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer) {
     return buffer_page_mapping;
 }
 
-
 Buffer::Buffer(const Buffer &other) :
     device_(other.device_),
     size_(other.size_),
@@ -204,9 +217,16 @@ Buffer &Buffer::operator=(const Buffer &other) {
     return *this;
 }
 
-Buffer::Buffer(Buffer &&other) : device_(other.device_), size_(other.size_), address_(other.address_), page_size_(other.page_size_), buffer_type_(other.buffer_type_) ,
-                                    buffer_layout_(other.buffer_layout_), shard_parameters_(other.shard_parameters_) {
-    // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is transferred to `this`
+Buffer::Buffer(Buffer &&other) :
+    device_(other.device_),
+    size_(other.size_),
+    address_(other.address_),
+    page_size_(other.page_size_),
+    buffer_type_(other.buffer_type_),
+    buffer_layout_(other.buffer_layout_),
+    shard_parameters_(other.shard_parameters_) {
+    // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is
+    // transferred to `this`
     other.device_ = nullptr;
 }
 
@@ -219,7 +239,8 @@ Buffer &Buffer::operator=(Buffer &&other) {
         this->buffer_type_ = other.buffer_type_;
         this->buffer_layout_ = other.buffer_layout_;
         this->shard_parameters_ = other.shard_parameters_;
-        // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is transferred to `this`
+        // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is
+        // transferred to `this`
         other.device_ = nullptr;
     }
     return *this;
@@ -257,15 +278,12 @@ CoreCoord Buffer::noc_coordinates(uint32_t bank_id) const {
         case BufferType::SYSTEM_MEMORY: {
             TT_THROW("Host buffer is located in system memory! Cannot retrieve NoC coordinates for it");
         } break;
-        default:
-            TT_ASSERT(false && "Unsupported buffer type!");
+        default: TT_ASSERT(false && "Unsupported buffer type!");
     }
     return CoreCoord{0, 0};
 }
 
-CoreCoord Buffer::noc_coordinates() const {
-    return this->noc_coordinates(0);
-}
+CoreCoord Buffer::noc_coordinates() const { return this->noc_coordinates(0); }
 
 uint64_t Buffer::page_address(uint32_t bank_id, uint32_t page_index) const {
     auto num_banks = this->device_->num_banks(this->buffer_type_);
@@ -301,9 +319,7 @@ void Buffer::deallocate() {
     detail::DeallocateBuffer(this);
 }
 
-Buffer::~Buffer() {
-    this->deallocate();
-}
+Buffer::~Buffer() { this->deallocate(); }
 
 tt::stl::reflection::Attributes ShardSpec::attributes() const {
     return {
@@ -314,7 +330,7 @@ tt::stl::reflection::Attributes ShardSpec::attributes() const {
     };
 }
 
-bool operator==(const ShardSpec& spec_a, const ShardSpec& spec_b) {
+bool operator==(const ShardSpec &spec_a, const ShardSpec &spec_b) {
     if (spec_a.grid != spec_b.grid) {
         return false;
     }
@@ -330,8 +346,10 @@ bool operator==(const ShardSpec& spec_a, const ShardSpec& spec_b) {
     return true;
 }
 
-bool operator!=(const ShardSpec& spec_a, const ShardSpec& spec_b) {
-    return not (spec_a == spec_b);
+bool operator!=(const ShardSpec &spec_a, const ShardSpec &spec_b) { return not(spec_a == spec_b); }
+
+namespace detail {
+buffer_map_t BUFFER_MAP = {};
 }
 
 }  // namespace tt_metal