From 28969713c3642b62b63fe3b2417530357743c392 Mon Sep 17 00:00:00 2001
From: Akhmed Rakhmati <akhmed.rakhmati@gmail.com>
Date: Wed, 22 May 2024 20:23:21 +0000
Subject: [PATCH] #8579: change TTNN_TENSOR_PRINT_PROFILE from inline to extern

---
 tt_eager/tensor/tensor_impl.cpp               |   2 +
 tt_eager/tensor/tensor_impl.hpp               |   2 +-
 .../tt_dnn/op_library/operation_history.cpp   |  15 +-
 .../tt_dnn/op_library/operation_history.hpp   |  16 +-
 tt_metal/impl/buffers/buffer.cpp              |  98 +++++----
 tt_metal/impl/buffers/buffer.hpp              | 193 ++++++++----------
 6 files changed, 167 insertions(+), 159 deletions(-)
diff --git a/tt_eager/tensor/tensor_impl.cpp b/tt_eager/tensor/tensor_impl.cpp
index f4f87e03385..f2f31beaca7 100644
--- a/tt_eager/tensor/tensor_impl.cpp
+++ b/tt_eager/tensor/tensor_impl.cpp
@@ -11,6 +11,8 @@ namespace tt_metal {
 
 namespace tensor_impl {
 
+TensorPrintProfile TTNN_TENSOR_PRINT_PROFILE = TensorPrintProfile::Short;
+
 std::ostream& operator<<(std::ostream& os, const DataType& dtype) {
     switch (dtype) {
         case DataType::BFLOAT8_B: os << "bfloat8_b"; break;
diff --git a/tt_eager/tensor/tensor_impl.hpp b/tt_eager/tensor/tensor_impl.hpp
index 025ad977b3f..7c8f71b25f1 100644
--- a/tt_eager/tensor/tensor_impl.hpp
+++ b/tt_eager/tensor/tensor_impl.hpp
@@ -707,7 +707,7 @@ enum class TensorPrintProfile {
     Full,
 };
 
-inline TensorPrintProfile TTNN_TENSOR_PRINT_PROFILE = TensorPrintProfile::Short;
+extern TensorPrintProfile TTNN_TENSOR_PRINT_PROFILE;
 
 namespace detail {
 
diff --git a/tt_eager/tt_dnn/op_library/operation_history.cpp b/tt_eager/tt_dnn/op_library/operation_history.cpp
index f5b5086fa91..688425b36c7 100644
--- a/tt_eager/tt_dnn/op_library/operation_history.cpp
+++ b/tt_eager/tt_dnn/op_library/operation_history.cpp
@@ -8,16 +8,13 @@ namespace tt {
 
 namespace tt_metal {
 
-
 #ifdef DEBUG
 
 namespace operation_history {
 
 namespace detail {
 
-OperationHistory::~OperationHistory() {
-    this->dump_to_csv();
-}
+OperationHistory::~OperationHistory() { this->dump_to_csv(); }
 
 void OperationHistory::append(OperationRecord&& record) {
     std::scoped_lock<std::mutex> lock(op_history_mutex);
@@ -132,15 +129,13 @@ void OperationHistory::clear() {
     this->records.clear();
 }
 
+OperationHistory OPERATION_HISTORY{};
+
 }  // namespace detail
 
-const char* csv_file_name() {
-    return std::getenv("OPERATION_HISTORY_CSV");
-}
+const char* csv_file_name() { return std::getenv("OPERATION_HISTORY_CSV"); }
 
-bool enabled() {
-    return csv_file_name() != nullptr;
-}
+bool enabled() { return csv_file_name() != nullptr; }
 
 void dump_to_csv() { detail::OPERATION_HISTORY.dump_to_csv(); }
 void clear() { detail::OPERATION_HISTORY.clear(); }
diff --git a/tt_eager/tt_dnn/op_library/operation_history.hpp b/tt_eager/tt_dnn/op_library/operation_history.hpp
index be338f507bf..fe97edab4a9 100644
--- a/tt_eager/tt_dnn/op_library/operation_history.hpp
+++ b/tt_eager/tt_dnn/op_library/operation_history.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <tt_eager/tensor/tensor.hpp>
+
 #include "tt_dnn/op_library/operation.hpp"
 
 namespace tt {
@@ -22,10 +23,15 @@ struct TensorRecord {
     const Layout layout;
     const std::optional<MemoryConfig> memory_config;
 
-    static constexpr auto attribute_names = std::make_tuple("storage_type", "shape", "data_type", "layout", "memory_config");
+    static constexpr auto attribute_names =
+        std::make_tuple("storage_type", "shape", "data_type", "layout", "memory_config");
     const auto attribute_values() const {
         return std::make_tuple(
-            std::cref(this->storage_type), std::cref(this->shape), std::cref(this->data_type), std::cref(this->layout), std::cref(this->memory_config));
+            std::cref(this->storage_type),
+            std::cref(this->shape),
+            std::cref(this->data_type),
+            std::cref(this->layout),
+            std::cref(this->memory_config));
     }
 };
 
@@ -54,12 +60,12 @@ struct OperationHistory {
     std::vector<OperationRecord> records;
 };
 
-inline OperationHistory OPERATION_HISTORY{};
+extern OperationHistory OPERATION_HISTORY;
 
 }  // namespace detail
 
-template<typename ... Args>
-inline void append(Args&& ... args) {
+template <typename... Args>
+inline void append(Args&&... args) {
     detail::OPERATION_HISTORY.append(std::forward<Args>(args)...);
 }
 
diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
index 45c2a6312ad..87761774c7c 100644
--- a/tt_metal/impl/buffers/buffer.cpp
+++ b/tt_metal/impl/buffers/buffer.cpp
@@ -4,8 +4,8 @@
 
 #include "tt_metal/impl/buffers/buffer.hpp"
 
-#include "tt_metal/common/assert.hpp"
 #include "llrt/llrt.hpp"
+#include "tt_metal/common/assert.hpp"
 #include "tt_metal/common/math.hpp"
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/hostdevcommon/common_values.hpp"
@@ -17,28 +17,36 @@ namespace tt {
 
 namespace tt_metal {
 
-bool is_sharded(const TensorMemoryLayout & layout){
+bool is_sharded(const TensorMemoryLayout &layout) {
     return (
-        layout == TensorMemoryLayout::HEIGHT_SHARDED ||
-        layout == TensorMemoryLayout::WIDTH_SHARDED ||
-        layout == TensorMemoryLayout::BLOCK_SHARDED );
+        layout == TensorMemoryLayout::HEIGHT_SHARDED || layout == TensorMemoryLayout::WIDTH_SHARDED ||
+        layout == TensorMemoryLayout::BLOCK_SHARDED);
 }
 
-
-void validate_buffer_size_and_page_size(uint64_t size, uint64_t page_size, const BufferType &buffer_type, const TensorMemoryLayout &buffer_layout, std::optional<ShardSpecBuffer> shard_parameters) {
+void validate_buffer_size_and_page_size(
+    uint64_t size,
+    uint64_t page_size,
+    const BufferType &buffer_type,
+    const TensorMemoryLayout &buffer_layout,
+    std::optional<ShardSpecBuffer> shard_parameters) {
     TT_FATAL(size != 0 and page_size != 0, "Buffer size and page size should be larger than 0 bytes!");
     bool valid_page_size = (size % page_size == 0);
-    TT_FATAL(valid_page_size, "For valid non-interleaved buffers page size {} must equal buffer size {}. For interleaved-buffers page size should be divisible by buffer size", page_size, size);
-    TT_FATAL(page_size % sizeof(uint32_t) == 0, "Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values");
-    if(buffer_layout == TensorMemoryLayout::SINGLE_BANK){
-        TT_ASSERT(page_size == size , "Continguous buffer must be one contiguous page");
-    }
-    else if(is_sharded(buffer_layout)){
-        TT_ASSERT(shard_parameters != std::nullopt , "Sharded buffers must have a core grid assigned");
+    TT_FATAL(
+        valid_page_size,
+        "For valid non-interleaved buffers page size {} must equal buffer size {}. For interleaved-buffers page size "
+        "should be divisible by buffer size",
+        page_size,
+        size);
+    TT_FATAL(
+        page_size % sizeof(uint32_t) == 0,
+        "Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values");
+    if (buffer_layout == TensorMemoryLayout::SINGLE_BANK) {
+        TT_ASSERT(page_size == size, "Continguous buffer must be one contiguous page");
+    } else if (is_sharded(buffer_layout)) {
+        TT_ASSERT(shard_parameters != std::nullopt, "Sharded buffers must have a core grid assigned");
     }
 }
 
-
 inline std::tuple<std::vector<std::vector<uint32_t>>, std::vector<std::array<uint32_t, 2>>> core_to_host_pages(
     const uint32_t &total_pages,
     const uint32_t &pages_per_shard,
@@ -105,12 +113,20 @@ inline std::tuple<std::vector<std::vector<uint32_t>>, std::vector<std::array<uin
     return {ret_vec, ret_shard_shape};
 }
 
-
-Buffer::Buffer(Device *device, uint64_t size, uint64_t page_size, const BufferType buffer_type,
-                const TensorMemoryLayout buffer_layout,
-                std::optional< ShardSpecBuffer> shard_parameters,
-                bool allocate)
-    : device_(device), size_(size), page_size_(page_size), buffer_type_(buffer_type), buffer_layout_(buffer_layout), shard_parameters_(shard_parameters) {
+Buffer::Buffer(
+    Device *device,
+    uint64_t size,
+    uint64_t page_size,
+    const BufferType buffer_type,
+    const TensorMemoryLayout buffer_layout,
+    std::optional<ShardSpecBuffer> shard_parameters,
+    bool allocate) :
+    device_(device),
+    size_(size),
+    page_size_(page_size),
+    buffer_type_(buffer_type),
+    buffer_layout_(buffer_layout),
+    shard_parameters_(shard_parameters) {
     TT_FATAL(this->device_ != nullptr and this->device_->allocator_ != nullptr);
     validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
     if (allocate) {
@@ -118,8 +134,6 @@ Buffer::Buffer(Device *device, uint64_t size, uint64_t page_size, const BufferTy
     }
 }
 
-
-
 BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer) {
     BufferPageMapping buffer_page_mapping;
     bool row_major = buffer.shard_spec().orientation() == ShardOrientation::ROW_MAJOR;
@@ -128,7 +142,7 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer) {
     buffer_page_mapping.all_cores_ = corerange_to_cores(buffer.shard_spec().grid(), num_cores, row_major);
     TT_ASSERT(num_cores == buffer_page_mapping.all_cores_.size());
     uint32_t core_id = 0;
-    for (const auto& core : buffer_page_mapping.all_cores_) {
+    for (const auto &core : buffer_page_mapping.all_cores_) {
         buffer_page_mapping.core_to_core_id_.insert({core, core_id});
         core_id++;
     }
@@ -180,7 +194,6 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer) {
     return buffer_page_mapping;
 }
 
-
 Buffer::Buffer(const Buffer &other) :
     device_(other.device_),
     size_(other.size_),
@@ -204,9 +217,16 @@ Buffer &Buffer::operator=(const Buffer &other) {
     return *this;
 }
 
-Buffer::Buffer(Buffer &&other) : device_(other.device_), size_(other.size_), address_(other.address_), page_size_(other.page_size_), buffer_type_(other.buffer_type_) ,
-                                    buffer_layout_(other.buffer_layout_), shard_parameters_(other.shard_parameters_) {
-    // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is transferred to `this`
+Buffer::Buffer(Buffer &&other) :
+    device_(other.device_),
+    size_(other.size_),
+    address_(other.address_),
+    page_size_(other.page_size_),
+    buffer_type_(other.buffer_type_),
+    buffer_layout_(other.buffer_layout_),
+    shard_parameters_(other.shard_parameters_) {
+    // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is
+    // transferred to `this`
     other.device_ = nullptr;
 }
 
@@ -219,7 +239,8 @@ Buffer &Buffer::operator=(Buffer &&other) {
         this->buffer_type_ = other.buffer_type_;
         this->buffer_layout_ = other.buffer_layout_;
         this->shard_parameters_ = other.shard_parameters_;
-        // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is transferred to `this`
+        // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is
+        // transferred to `this`
         other.device_ = nullptr;
     }
     return *this;
@@ -257,15 +278,12 @@ CoreCoord Buffer::noc_coordinates(uint32_t bank_id) const {
         case BufferType::SYSTEM_MEMORY: {
             TT_THROW("Host buffer is located in system memory! Cannot retrieve NoC coordinates for it");
         } break;
-        default:
-            TT_ASSERT(false && "Unsupported buffer type!");
+        default: TT_ASSERT(false && "Unsupported buffer type!");
     }
     return CoreCoord{0, 0};
 }
 
-CoreCoord Buffer::noc_coordinates() const {
-    return this->noc_coordinates(0);
-}
+CoreCoord Buffer::noc_coordinates() const { return this->noc_coordinates(0); }
 
 uint64_t Buffer::page_address(uint32_t bank_id, uint32_t page_index) const {
     auto num_banks = this->device_->num_banks(this->buffer_type_);
@@ -301,9 +319,7 @@ void Buffer::deallocate() {
     detail::DeallocateBuffer(this);
 }
 
-Buffer::~Buffer() {
-    this->deallocate();
-}
+Buffer::~Buffer() { this->deallocate(); }
 
 tt::stl::reflection::Attributes ShardSpec::attributes() const {
     return {
@@ -314,7 +330,7 @@ tt::stl::reflection::Attributes ShardSpec::attributes() const {
     };
 }
 
-bool operator==(const ShardSpec& spec_a, const ShardSpec& spec_b) {
+bool operator==(const ShardSpec &spec_a, const ShardSpec &spec_b) {
     if (spec_a.grid != spec_b.grid) {
         return false;
     }
@@ -330,8 +346,10 @@ bool operator==(const ShardSpec& spec_a, const ShardSpec& spec_b) {
     return true;
 }
 
-bool operator!=(const ShardSpec& spec_a, const ShardSpec& spec_b) {
-    return not (spec_a == spec_b);
+bool operator!=(const ShardSpec &spec_a, const ShardSpec &spec_b) { return not(spec_a == spec_b); }
+
+namespace detail {
+buffer_map_t BUFFER_MAP = {};
 }
 
 }  // namespace tt_metal
diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp
index 3ed7829c2cc..48feb992ff4 100644
--- a/tt_metal/impl/buffers/buffer.hpp
+++ b/tt_metal/impl/buffers/buffer.hpp
@@ -4,18 +4,18 @@
 
 #pragma once
 
-#include "common/tt_backend_api_types.hpp"
-#include "common/core_coord.h"
+#include <map>
+#include <mutex>
+#include <optional>
+
 #include "common/bfloat16.hpp"
+#include "common/core_coord.h"
+#include "common/tt_backend_api_types.hpp"
 #include "hostdevcommon/common_values.hpp"
 #include "tt_metal/common/constants.hpp"
 #include "tt_metal/common/math.hpp"
 #include "tt_metal/tt_stl/concepts.hpp"
 #include "tt_metal/tt_stl/reflection.hpp"
-#include "tt_metal/common/math.hpp"
-#include <map>
-#include <mutex>
-#include <optional>
 
 namespace tt {
 
@@ -43,82 +43,71 @@ enum class ShardOrientation {
     COL_MAJOR,
 };
 
-
 struct ShardSpec {
     CoreRangeSet grid;
     std::array<uint32_t, 2> shape;
     ShardOrientation orientation = ShardOrientation::ROW_MAJOR;
     bool halo = false;
 
-    ShardSpec(const CoreRangeSet & core_sets_,
-                    const std::array<uint32_t,2> & shard_shape_,
-                    const ShardOrientation & shard_orientation_ = ShardOrientation::ROW_MAJOR,
-                    const bool & halo_ = false):
-                    grid(core_sets_), shape(shard_shape_),
-                    orientation(shard_orientation_), halo(halo_)
-                    {;}
+    ShardSpec(
+        const CoreRangeSet &core_sets_,
+        const std::array<uint32_t, 2> &shard_shape_,
+        const ShardOrientation &shard_orientation_ = ShardOrientation::ROW_MAJOR,
+        const bool &halo_ = false) :
+        grid(core_sets_), shape(shard_shape_), orientation(shard_orientation_), halo(halo_) {
+        ;
+    }
 
-    const uint32_t num_cores() const {return this->grid.num_cores();}
+    const uint32_t num_cores() const { return this->grid.num_cores(); }
     const uint32_t numel() const { return this->shape[0] * this->shape[1]; }
     tt::stl::reflection::Attributes attributes() const;
-
 };
 
-bool operator==(const ShardSpec& spec_a, const ShardSpec& spec_b);
-bool operator!=(const ShardSpec& spec_a, const ShardSpec& spec_b);
+bool operator==(const ShardSpec &spec_a, const ShardSpec &spec_b);
+bool operator!=(const ShardSpec &spec_a, const ShardSpec &spec_b);
 
 struct ShardSpecBuffer {
     ShardSpec tensor_shard_spec;
     std::array<uint32_t, 2> page_shape;
-    std::array<uint32_t, 2 > tensor2d_shape;
-    ShardSpecBuffer(const CoreRangeSet & core_sets_,
-                const std::array<uint32_t,2> & shard_shape_,
-                const ShardOrientation & shard_orientation_,
-                const bool & halo_,
-                const std::array<uint32_t, 2> & page_shape,
-                const std::array<uint32_t, 2> & tensor2d_shape
-                ): tensor_shard_spec(core_sets_, shard_shape_, shard_orientation_, halo_)
-                {
-                    this->page_shape = page_shape;
-                    this->tensor2d_shape = tensor2d_shape;
-                }
+    std::array<uint32_t, 2> tensor2d_shape;
     ShardSpecBuffer(
-            const ShardSpec & shard_spec,
-            const std::array<uint32_t, 2> & page_shape,
-            const std::array<uint32_t, 2> & tensor2d_shape
-            ): tensor_shard_spec(shard_spec)
-            {
-                this->page_shape = page_shape;
-                this-> tensor2d_shape = tensor2d_shape;
-            }
-    CoreRangeSet grid() const {
-        return tensor_shard_spec.grid;
+        const CoreRangeSet &core_sets_,
+        const std::array<uint32_t, 2> &shard_shape_,
+        const ShardOrientation &shard_orientation_,
+        const bool &halo_,
+        const std::array<uint32_t, 2> &page_shape,
+        const std::array<uint32_t, 2> &tensor2d_shape) :
+        tensor_shard_spec(core_sets_, shard_shape_, shard_orientation_, halo_) {
+        this->page_shape = page_shape;
+        this->tensor2d_shape = tensor2d_shape;
     }
-    std::array<uint32_t, 2>  shape() const {
-        return tensor_shard_spec.shape;
-    }
-    ShardOrientation orientation() const {
-        return tensor_shard_spec.orientation;
-    }
-    bool halo() const {
-        return tensor_shard_spec.halo;
+    ShardSpecBuffer(
+        const ShardSpec &shard_spec,
+        const std::array<uint32_t, 2> &page_shape,
+        const std::array<uint32_t, 2> &tensor2d_shape) :
+        tensor_shard_spec(shard_spec) {
+        this->page_shape = page_shape;
+        this->tensor2d_shape = tensor2d_shape;
     }
+    CoreRangeSet grid() const { return tensor_shard_spec.grid; }
+    std::array<uint32_t, 2> shape() const { return tensor_shard_spec.shape; }
+    ShardOrientation orientation() const { return tensor_shard_spec.orientation; }
+    bool halo() const { return tensor_shard_spec.halo; }
     std::array<uint32_t, 2> shape_in_pages() const {
         auto width_in_pages = tensor_shard_spec.shape[0] / page_shape[0];
         auto height_in_pages = tensor_shard_spec.shape[1] / page_shape[1];
         return {width_in_pages, height_in_pages};
     }
-    uint32_t size() const{
+    uint32_t size() const {
         auto shape_in_pages_ = this->shape_in_pages();
         return shape_in_pages_[0] * shape_in_pages_[1];
     }
 };
 
-
 struct BufferConfig {
     Device *device;
-    uint64_t size;                 // Size in bytes
-    uint64_t page_size;            // Size of unit being interleaved. For non-interleaved buffers: size == page_size
+    uint64_t size;       // Size in bytes
+    uint64_t page_size;  // Size of unit being interleaved. For non-interleaved buffers: size == page_size
     BufferType buffer_type;
     TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED;
 };
@@ -129,32 +118,29 @@ typedef BufferConfig InterleavedBufferConfig;
 // designator constructor
 struct ShardedBufferConfig {
     Device *device;
-    uint64_t size;                 // Size in bytes
-    uint64_t page_size;            // Size of unit being interleaved. For non-interleaved buffers: size == page_size
+    uint64_t size;       // Size in bytes
+    uint64_t page_size;  // Size of unit being interleaved. For non-interleaved buffers: size == page_size
     BufferType buffer_type = BufferType::L1;
     TensorMemoryLayout buffer_layout = TensorMemoryLayout::HEIGHT_SHARDED;
     ShardSpecBuffer shard_parameters;
 };
 
-bool is_sharded(const TensorMemoryLayout & layout);
+bool is_sharded(const TensorMemoryLayout &layout);
 
 struct BufferPageMapping {
-    std::vector< CoreCoord> all_cores_;
-    std::vector< uint32_t> core_bank_indices_;
-    std::vector< std::vector<uint32_t> > core_host_page_indices_;
+    std::vector<CoreCoord> all_cores_;
+    std::vector<uint32_t> core_bank_indices_;
+    std::vector<std::vector<uint32_t>> core_host_page_indices_;
     std::vector<uint32_t> dev_page_to_core_mapping_;
 
-    //some dev pages don't have mapping to host (in case of padding)
-    std::vector<std::optional<uint32_t> > dev_page_to_host_page_mapping_;
+    // some dev pages don't have mapping to host (in case of padding)
+    std::vector<std::optional<uint32_t>> dev_page_to_host_page_mapping_;
     std::vector<uint32_t> host_page_to_dev_page_mapping_;
     std::unordered_map<CoreCoord, uint32_t> core_to_core_id_;
-    std::vector< uint32_t> host_page_to_local_shard_page_mapping_;
-    std::vector < std::array<uint32_t, 2> > core_shard_shape_;
-
+    std::vector<uint32_t> host_page_to_local_shard_page_mapping_;
+    std::vector<std::array<uint32_t, 2>> core_shard_shape_;
 };
 
-
-
 class Buffer {
    public:
     Buffer() :
@@ -163,16 +149,20 @@ class Buffer {
         buffer_layout_(TensorMemoryLayout::INTERLEAVED),
         shard_parameters_(std::nullopt) {}
 
-    Buffer(Device *device, uint64_t size, uint64_t page_size, const BufferType buffer_type,
-        const TensorMemoryLayout buffer_layout=TensorMemoryLayout::INTERLEAVED,
+    Buffer(
+        Device *device,
+        uint64_t size,
+        uint64_t page_size,
+        const BufferType buffer_type,
+        const TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED,
         std::optional<ShardSpecBuffer> shard_parameter = std::nullopt,
         bool allocate = true);
 
     Buffer(const Buffer &other);
-    Buffer& operator=(const Buffer &other);
+    Buffer &operator=(const Buffer &other);
 
     Buffer(Buffer &&other);
-    Buffer& operator=(Buffer &&other);
+    Buffer &operator=(Buffer &&other);
 
     ~Buffer();
     Device *device() const { return device_; }
@@ -192,8 +182,7 @@ class Buffer {
     uint32_t num_dev_pages() const {
         if (!is_sharded(this->buffer_layout_)) {
             return this->num_pages();
-        }
-        else {
+        } else {
             return this->shard_spec().size() * this->num_cores();
         }
     }
@@ -216,22 +205,21 @@ class Buffer {
 
     uint64_t page_address(uint32_t bank_id, uint32_t page_index) const;
 
-
     // SHARDED API STARTS HERE
     // TODO: WILL SEPARATE INTO SHARDED BUFFER CLASS
 
     uint64_t sharded_page_address(uint32_t bank_id, uint32_t page_index) const;
 
     ShardSpecBuffer shard_spec() const {
-        TT_ASSERT(is_sharded(this->buffer_layout_) , "Buffer not sharded");
+        TT_ASSERT(is_sharded(this->buffer_layout_), "Buffer not sharded");
         TT_ASSERT(shard_parameters_.has_value());
         return this->shard_parameters_.value();
     }
 
-    uint32_t num_cores() const{
-        if(!is_sharded(this->buffer_layout_))
+    uint32_t num_cores() const {
+        if (!is_sharded(this->buffer_layout_))
             return 1;
-        else{
+        else {
             return this->shard_spec().tensor_shard_spec.grid.num_cores();
         }
     }
@@ -245,49 +233,48 @@ class Buffer {
     uint64_t translate_page_address(uint64_t offset, uint32_t bank_id) const;
 
     Device *device_;
-    uint64_t size_;                 // Size in bytes
-    uint64_t address_;              // Address of buffer
-    uint64_t page_size_;            // Size of unit being interleaved. For non-interleaved buffers: size == page_size
+    uint64_t size_;       // Size in bytes
+    uint64_t address_;    // Address of buffer
+    uint64_t page_size_;  // Size of unit being interleaved. For non-interleaved buffers: size == page_size
     BufferType buffer_type_;
     TensorMemoryLayout buffer_layout_;
     std::optional<ShardSpecBuffer> shard_parameters_;
 };
 
-
 BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer);
 
 namespace detail {
 using PageAddress = uint32_t;
 using Deviceid = uint32_t;
 
-class buffer_map {
-    public:
-        void insert(std::tuple<Deviceid, PageAddress> buf_attr,  Buffer * buffer) {
-            std::scoped_lock<std::mutex> lock(this->map_mutex);
-            this->map.insert({buf_attr, buffer});
-        }
+class buffer_map_t {
+   public:
+    void insert(std::tuple<Deviceid, PageAddress> buf_attr, Buffer *buffer) {
+        std::scoped_lock<std::mutex> lock(this->map_mutex);
+        this->map.insert({buf_attr, buffer});
+    }
 
-        void erase(std::tuple<Deviceid, PageAddress> buf_attr) {
-            std::scoped_lock<std::mutex> lock(this->map_mutex);
-            this->map.erase(buf_attr);
-        }
+    void erase(std::tuple<Deviceid, PageAddress> buf_attr) {
+        std::scoped_lock<std::mutex> lock(this->map_mutex);
+        this->map.erase(buf_attr);
+    }
 
-        void clear() {
-            std::scoped_lock<std::mutex> lock(this->map_mutex);
-            this->map.clear();
-        }
+    void clear() {
+        std::scoped_lock<std::mutex> lock(this->map_mutex);
+        this->map.clear();
+    }
 
-        std::map<std::tuple<Deviceid, PageAddress>, Buffer *> value() {
-            std::scoped_lock<std::mutex> lock(this->map_mutex);
-            return this->map;
-        }
+    std::map<std::tuple<Deviceid, PageAddress>, Buffer *> value() {
+        std::scoped_lock<std::mutex> lock(this->map_mutex);
+        return this->map;
+    }
 
-    private:
-        std::mutex map_mutex;
-        std::map<std::tuple<Deviceid, PageAddress>, Buffer *> map = {};
+   private:
+    std::mutex map_mutex;
+    std::map<std::tuple<Deviceid, PageAddress>, Buffer *> map = {};
 };
 
-inline buffer_map BUFFER_MAP;
+extern buffer_map_t BUFFER_MAP;
 }  // namespace detail
 
 using HostDataType = std::variant<
@@ -296,7 +283,7 @@ using HostDataType = std::variant<
     const std::shared_ptr<std::vector<uint32_t>>,
     const std::shared_ptr<std::vector<float>>,
     const std::shared_ptr<std::vector<bfloat16>>,
-    const void*>;
+    const void *>;
 
 }  // namespace tt_metal