diff --git a/tt_eager/tt_dnn/kernels/compute/reduce_h.cpp b/tt_eager/tt_dnn/kernels/compute/reduce_h.cpp index c554610b5ff..3620325041e 100644 --- a/tt_eager/tt_dnn/kernels/compute/reduce_h.cpp +++ b/tt_eager/tt_dnn/kernels/compute/reduce_h.cpp @@ -4,8 +4,6 @@ #include -#include "debug/dprint.h" - #include "compute_kernel_api/reduce.h" namespace NAMESPACE { diff --git a/tt_eager/tt_dnn/kernels/compute/reduce_hw.cpp b/tt_eager/tt_dnn/kernels/compute/reduce_hw.cpp index e6b39ab3b41..83fdc0a01fe 100644 --- a/tt_eager/tt_dnn/kernels/compute/reduce_hw.cpp +++ b/tt_eager/tt_dnn/kernels/compute/reduce_hw.cpp @@ -4,8 +4,6 @@ #include -#include "debug/dprint.h" - #include "compute_kernel_api/reduce.h" namespace NAMESPACE { diff --git a/tt_eager/tt_dnn/kernels/compute/reduce_w.cpp b/tt_eager/tt_dnn/kernels/compute/reduce_w.cpp index 3c1e08287c3..2c77aeacaec 100644 --- a/tt_eager/tt_dnn/kernels/compute/reduce_w.cpp +++ b/tt_eager/tt_dnn/kernels/compute/reduce_w.cpp @@ -4,8 +4,6 @@ #include -#include "debug/dprint.h" - #include "compute_kernel_api/reduce.h" namespace NAMESPACE { diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_binary_dtx.cpp b/tt_eager/tt_dnn/kernels/dataflow/reader_binary_dtx.cpp index b958fa74f16..aeab6c49252 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_binary_dtx.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_binary_dtx.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" + inline void noc_async_read_from_dram_to_l1(uint32_t dram_addr, uint32_t dram_noc_x, uint32_t dram_noc_y, uint32_t l1_dest_addr, uint32_t read_size) { uint64_t src_noc_addr = get_noc_addr(dram_noc_x, dram_noc_y, dram_addr); noc_async_read(src_noc_addr, l1_dest_addr, read_size); diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank.cpp b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank.cpp index ead8939d023..815adab1598 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank.cpp @@ -5,7 +5,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { // same arg indices as in reader_binary_diff_lenghts for compat diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank_output_tiles_partitioned.cpp b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank_output_tiles_partitioned.cpp index b271abac511..3fd7ae84a91 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank_output_tiles_partitioned.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank_output_tiles_partitioned.cpp @@ -5,8 +5,6 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" - void kernel_main() { // same arg indices as in reader_binary_diff_lenghts for compat uint32_t src0_addr = get_arg_val(0); diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core.cpp b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core.cpp index bc5d6c56549..7a58870fe5f 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core.cpp @@ -5,7 +5,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" #ifdef FUSE_BIAS #include "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp" diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp index 8eed70b051d..40616695905 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp @@ -5,7 +5,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" template diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_tilize_untilize.cpp b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_tilize_untilize.cpp index 9fba5c6afdd..dbcfaea7810 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_tilize_untilize.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_tilize_untilize.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" #ifdef FUSE_BIAS #include "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp" diff --git a/tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_split_two_chunks.cpp b/tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_split_two_chunks.cpp index 67dcd519867..974466025ed 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_split_two_chunks.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_split_two_chunks.cpp @@ -8,7 +8,7 @@ //#define DEBUG #ifdef DEBUG -#include "debug/dprint.h" +// #include "debug/dprint.h" #endif void kernel_main() { diff --git a/tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp b/tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp index 230c83de194..f14ab22ded4 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp @@ -5,7 +5,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" using uint32_t = std::uint32_t; diff --git a/tt_eager/tt_dnn/kernels/dataflow/writer_bmm_single_core_tiled.cpp b/tt_eager/tt_dnn/kernels/dataflow/writer_bmm_single_core_tiled.cpp index 080913d1a1e..fc28fe62051 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/writer_bmm_single_core_tiled.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/writer_bmm_single_core_tiled.cpp @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { // This writer is for output tensor in tile format diff --git a/tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_8bank_blocks.cpp b/tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_8bank_blocks.cpp index d9fd8948880..0a28fd5a5d5 100644 --- a/tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_8bank_blocks.cpp +++ b/tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_8bank_blocks.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" template inline void write_tiles_in_block(uint32_t cb_id_out0, diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv1x1_activations_fast_for_col_major_conv_out_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv1x1_activations_fast_for_col_major_conv_out_blocks.cpp index 8d86e13128e..8d639f6af0a 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv1x1_activations_fast_for_col_major_conv_out_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv1x1_activations_fast_for_col_major_conv_out_blocks.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" inline void pad_l1_buffer_with_zeroes(uint32_t l1_addr, uint32_t pad_size_bytes) { volatile std::uint32_t* dst = reinterpret_cast(l1_addr); diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations.cpp index e915ae9256f..914b0473689 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" inline void pad_l1_buffer_with_zeroes(uint32_t l1_addr, uint32_t pad_size_bytes) { volatile std::uint32_t* dst = reinterpret_cast(l1_addr); diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_2d_mcast_padded_with_halo_3x3_weights.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_2d_mcast_padded_with_halo_3x3_weights.cpp index a5a1c2164ad..cf19d087188 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_2d_mcast_padded_with_halo_3x3_weights.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_2d_mcast_padded_with_halo_3x3_weights.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" FORCE_INLINE void read_channels(uint32_t& l1_write_addr_act, const uint32_t act_l1_read_addr, const uint32_t reader_channel_idx, diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_act_block_w_equals_channels_X_filter_width.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_act_block_w_equals_channels_X_filter_width.cpp index a062564cd4a..431fca5c2af 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_act_block_w_equals_channels_X_filter_width.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_act_block_w_equals_channels_X_filter_width.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" inline void pad_l1_buffer_with_zeroes(uint32_t l1_addr, uint32_t pad_size_bytes) { volatile std::uint32_t* dst = reinterpret_cast(l1_addr); diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast.cpp index ea7f0c8071a..2e6187dd584 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" inline void pad_l1_buffer_with_zeroes(uint32_t l1_addr, uint32_t pad_size_bytes) { volatile std::uint32_t* dst = reinterpret_cast(l1_addr); diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_for_col_major_conv_out_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_for_col_major_conv_out_blocks.cpp index 85cc5927d05..6374673b9c0 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_for_col_major_conv_out_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_for_col_major_conv_out_blocks.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" inline void pad_l1_buffer_with_zeroes(uint32_t l1_addr, uint32_t pad_size_bytes) { volatile std::uint32_t* dst = reinterpret_cast(l1_addr); diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_resnet50_first_conv.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_resnet50_first_conv.cpp index 3fd6e88c221..d96f7acb84f 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_resnet50_first_conv.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_resnet50_first_conv.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { uint32_t i = 0; diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_without_conv_padding.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_without_conv_padding.cpp index 87c86001bb9..9cec0f43c7f 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_without_conv_padding.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_fast_without_conv_padding.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" inline void pad_l1_buffer_with_zeroes(uint32_t l1_addr, uint32_t pad_size_bytes) { volatile std::uint32_t* dst = reinterpret_cast(l1_addr); diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_padded_with_halo_3x3_weights.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_padded_with_halo_3x3_weights.cpp index b6c03914a9e..5b41a25d49a 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_padded_with_halo_3x3_weights.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/reader_conv_activations_padded_with_halo_3x3_weights.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_tiled_out.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_tiled_out.cpp index 301e8620d56..dc3e3d79ddd 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_tiled_out.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_tiled_out.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" #ifdef FUSE_BIAS #include "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp" diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_untilize_out.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_untilize_out.cpp index 2ee8e09fef3..9004ba251d1 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_untilize_out.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_and_reader_weights_resnet50_first_conv_untilize_out.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" FORCE_INLINE void read_weight_blocks_inner_h_dim(uint32_t cb_id_weight, uint32_t num_blocks_weight_h, diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp index ec9776b31d8..4ddd2dbecff 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp index 98e0f015fa5..e3878564564 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_1d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp index 3c1594c049f..9f62d231859 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp index d602cc17fe9..fb108ed90ef 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_2d_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp index 1d04b125f02..213beb08606 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp index 0426853ca7c..b9892d485aa 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_receiver_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp index 37960d0b7e4..e18486ba2d3 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp index e13a646302c..ac7ff236989 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_mcast_sender_conv_weights_tiled_col_to_rm_blocks_num_blocks_weight_h_eq_1.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" void kernel_main() { diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled.cpp index 57722da293c..3b942ee2230 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" #ifdef FUSE_BIAS #include "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp" diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks.cpp index 878b4d1f74b..d891197708f 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" #ifdef FUSE_BIAS #include "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp" diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks_read_weight_slices_once.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks_read_weight_slices_once.cpp index df58c5e46b2..eddc68b0ac4 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks_read_weight_slices_once.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_tiled_out_reader_conv_weights_tiled_col_to_rm_blocks_read_weight_slices_once.cpp @@ -4,7 +4,7 @@ #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" #ifdef FUSE_BIAS #include "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_single_core_bias.hpp" diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_8bank_blocks_reader_weight_tile_with_pow2_addr_gen_fast.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_8bank_blocks_reader_weight_tile_with_pow2_addr_gen_fast.cpp index 4931dafaa74..82945621314 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_8bank_blocks_reader_weight_tile_with_pow2_addr_gen_fast.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_8bank_blocks_reader_weight_tile_with_pow2_addr_gen_fast.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" FORCE_INLINE void read_weight_blocks_inner_h_dim(uint32_t cb_id_weight, uint32_t num_blocks_weight_h, diff --git a/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_layout_8bank_blocks_reader_weight_tile_layout.cpp b/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_layout_8bank_blocks_reader_weight_tile_layout.cpp index 88c4b637296..dc6f878cb7c 100644 --- a/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_layout_8bank_blocks_reader_weight_tile_layout.cpp +++ b/tt_eager/tt_dnn/op_library/conv/kernels/writer_unary_stick_layout_8bank_blocks_reader_weight_tile_layout.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" // TODO: FORCE INLINE inline void read_weight_blocks_inner_h_dim(uint32_t cb_id_weight, diff --git a/tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/writer_unary_sharded_ln_rm_gb.cpp b/tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/writer_unary_sharded_ln_rm_gb.cpp index bd5f4ed69b5..5890e2ea1fc 100644 --- a/tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/writer_unary_sharded_ln_rm_gb.cpp +++ b/tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/writer_unary_sharded_ln_rm_gb.cpp @@ -5,7 +5,7 @@ #include #include "dataflow_api.h" #include "hostdevcommon/common_values.hpp" -#include "debug/dprint.h" +// #include "debug/dprint.h" FORCE_INLINE void generate_bcast_scaler_c() { constexpr uint32_t cb_in_4 = tt::CB::c_in4; diff --git a/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp b/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp index 50edeeb551b..d423494f3c8 100644 --- a/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp @@ -5,7 +5,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" // SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; // SliceRange srt = SliceRange{ .h0 = 0, .h1 = 4, .hs = 1, .w0 = 0, .w1 = 8, .ws = 1 }; @@ -127,8 +127,8 @@ void kernel_main() { // NOTE: batch is folded in - DPRINT << "NOC coords 0: " << (uint) my_x[0] << "," << (uint) my_y[0] << ENDL(); - DPRINT << "NOC coords 1: " << (uint) my_x[1] << "," << (uint) my_y[1] << ENDL(); + // DPRINT << "NOC coords 0: " << (uint) my_x[0] << "," << (uint) my_y[0] << ENDL(); + // DPRINT << "NOC coords 1: " << (uint) my_x[1] << "," << (uint) my_y[1] << ENDL(); uint32_t core_out_w_i_start = get_arg_val(38); uint32_t core_out_h_i_start = get_arg_val(39); diff --git a/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp b/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp index 4a30663d869..80075c25de4 100644 --- a/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp +++ b/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp @@ -6,10 +6,10 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" +// #include "debug/dprint.h" -SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; -SliceRange srt = SliceRange{ .h0 = 0, .h1 = 16, .hs = 1, .w0 = 0, .w1 = 2, .ws = 1 }; +// SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; +// SliceRange srt = SliceRange{ .h0 = 0, .h1 = 16, .hs = 1, .w0 = 0, .w1 = 2, .ws = 1 }; // inline void print_full_tile(uint32_t cb_id, uint32_t tile_id = 0, bool untilize = false) { // DPRINT << "======" << ENDL(); @@ -20,16 +20,16 @@ SliceRange srt = SliceRange{ .h0 = 0, .h1 = 16, .hs = 1, .w0 = 0, .w1 = 2, .ws = // DPRINT << "++++++" << ENDL(); // } -inline void print_pages(uint32_t l1_addr, uint32_t pagelen, uint32_t npages, uint32_t start = 0) { - volatile tt_l1_ptr uint16_t* ptr = reinterpret_cast(l1_addr) + start * pagelen; - for (uint32_t page = 0; page < npages; ++ page) { - DPRINT << start + page << ": "; - for (uint32_t j = 0; j < pagelen; ++ j, ++ ptr) { - DPRINT << BF16(*ptr) << " "; - } - DPRINT << ENDL(); - } -} +// inline void print_pages(uint32_t l1_addr, uint32_t pagelen, uint32_t npages, uint32_t start = 0) { +// volatile tt_l1_ptr uint16_t* ptr = reinterpret_cast(l1_addr) + start * pagelen; +// for (uint32_t page = 0; page < npages; ++ page) { +// DPRINT << start + page << ": "; +// for (uint32_t j = 0; j < pagelen; ++ j, ++ ptr) { +// DPRINT << BF16(*ptr) << " "; +// } +// DPRINT << ENDL(); +// } +// } #define ALWI inline __attribute__((always_inline)) diff --git a/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp b/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp index faafdc73c76..cde53eae888 100644 --- a/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp @@ -5,8 +5,6 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" - // #include "debug/dprint.h" // SliceRange srt = SliceRange{ .h0 = 0, .h1 = 32, .hs = 8, .w0 = 0, .w1 = 32, .ws = 8 }; // SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 64, .ws = 2 }; diff --git a/tt_eager/tt_dnn/op_library/softmax/kernels/dataflow/reader_unary_sharded_sm_rm_mask.cpp b/tt_eager/tt_dnn/op_library/softmax/kernels/dataflow/reader_unary_sharded_sm_rm_mask.cpp index 21b65e63625..4d8d290d64e 100644 --- a/tt_eager/tt_dnn/op_library/softmax/kernels/dataflow/reader_unary_sharded_sm_rm_mask.cpp +++ b/tt_eager/tt_dnn/op_library/softmax/kernels/dataflow/reader_unary_sharded_sm_rm_mask.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "debug/dprint.h" +// #include "debug/dprint.h" #include "dataflow_api.h" FORCE_INLINE void generate_bcast_scaler() { diff --git a/tt_eager/tt_dnn/op_library/transformer_tms/compute/transpose_wh_sharded.cpp b/tt_eager/tt_dnn/op_library/transformer_tms/compute/transpose_wh_sharded.cpp index 8b078a89b7f..e3fb92f1eda 100644 --- a/tt_eager/tt_dnn/op_library/transformer_tms/compute/transpose_wh_sharded.cpp +++ b/tt_eager/tt_dnn/op_library/transformer_tms/compute/transpose_wh_sharded.cpp @@ -2,7 +2,6 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "debug/dprint.h" #include #include "compute_kernel_api/transpose_wh.h" diff --git a/tt_eager/tt_dnn/op_library/transformer_tms/dataflow/reader_tm_tile_layout_create_qkv_heads_sharded.cpp b/tt_eager/tt_dnn/op_library/transformer_tms/dataflow/reader_tm_tile_layout_create_qkv_heads_sharded.cpp index 2495129d050..cfab2446f58 100644 --- a/tt_eager/tt_dnn/op_library/transformer_tms/dataflow/reader_tm_tile_layout_create_qkv_heads_sharded.cpp +++ b/tt_eager/tt_dnn/op_library/transformer_tms/dataflow/reader_tm_tile_layout_create_qkv_heads_sharded.cpp @@ -2,7 +2,6 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "debug/dprint.h" #include #include "dataflow_api.h" diff --git a/tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp b/tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp index 79373c77488..0830335a9a1 100644 --- a/tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp @@ -5,8 +5,6 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" - using uint32_t = std::uint32_t; // tile index to address diff --git a/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp b/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp index 68e6218f407..8ca78c9926b 100644 --- a/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp @@ -5,21 +5,21 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" - -SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; -SliceRange srt = SliceRange{ .h0 = 0, .h1 = 8, .hs = 1, .w0 = 0, .w1 = 4, .ws = 1 }; - -inline void print_sticks(uint32_t l1_addr, uint32_t stick_start, uint32_t nsticks, uint32_t stick_size = 64) { - for (uint32_t i = stick_start; i < stick_start + nsticks; ++ i) { - volatile tt_l1_ptr uint16_t* l1_ptr = reinterpret_cast(l1_addr + i * stick_size * 2); - DPRINT << i << ": "; - for (uint32_t j = 0; j < stick_size; ++ j) { - DPRINT << BF16(l1_ptr[j]) << " "; - } - DPRINT << ENDL(); - } -} +// #include "debug/dprint.h" + +// SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; +// SliceRange srt = SliceRange{ .h0 = 0, .h1 = 8, .hs = 1, .w0 = 0, .w1 = 4, .ws = 1 }; + +// inline void print_sticks(uint32_t l1_addr, uint32_t stick_start, uint32_t nsticks, uint32_t stick_size = 64) { +// for (uint32_t i = stick_start; i < stick_start + nsticks; ++ i) { +// volatile tt_l1_ptr uint16_t* l1_ptr = reinterpret_cast(l1_addr + i * stick_size * 2); +// DPRINT << i << ": "; +// for (uint32_t j = 0; j < stick_size; ++ j) { +// DPRINT << BF16(l1_ptr[j]) << " "; +// } +// DPRINT << ENDL(); +// } +// } // Fill an L1 buffer with the given val inline bool fill_with_val(uint32_t begin_addr, uint32_t n, uint16_t val) { diff --git a/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp b/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp index 3cb62693521..839c8bab2fe 100644 --- a/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp @@ -5,21 +5,21 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" - -SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; -SliceRange srt = SliceRange{ .h0 = 0, .h1 = 8, .hs = 1, .w0 = 0, .w1 = 4, .ws = 1 }; - -inline void print_sticks(uint32_t l1_addr, uint32_t stick_start, uint32_t nsticks, uint32_t stick_size = 64) { - for (uint32_t i = stick_start; i < stick_start + nsticks; ++ i) { - volatile tt_l1_ptr uint16_t* l1_ptr = reinterpret_cast(l1_addr + i * stick_size * 2); - DPRINT << i << ": "; - for (uint32_t j = 0; j < stick_size; ++ j) { - DPRINT << BF16(l1_ptr[j]) << " "; - } - DPRINT << ENDL(); - } -} +// #include "debug/dprint.h" + +// SliceRange srr = SliceRange{ .h0 = 0, .h1 = 1, .hs = 8, .w0 = 0, .w1 = 32, .ws = 1 }; +// SliceRange srt = SliceRange{ .h0 = 0, .h1 = 8, .hs = 1, .w0 = 0, .w1 = 4, .ws = 1 }; + +// inline void print_sticks(uint32_t l1_addr, uint32_t stick_start, uint32_t nsticks, uint32_t stick_size = 64) { +// for (uint32_t i = stick_start; i < stick_start + nsticks; ++ i) { +// volatile tt_l1_ptr uint16_t* l1_ptr = reinterpret_cast(l1_addr + i * stick_size * 2); +// DPRINT << i << ": "; +// for (uint32_t j = 0; j < stick_size; ++ j) { +// DPRINT << BF16(l1_ptr[j]) << " "; +// } +// DPRINT << ENDL(); +// } +// } // Fill an L1 buffer with the given val inline bool fill_with_val(uint32_t begin_addr, uint32_t n, uint16_t val) { diff --git a/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_unpad_batch_rows_sharded.cpp b/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_unpad_batch_rows_sharded.cpp index 8384aaf3551..4458111ae79 100644 --- a/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_unpad_batch_rows_sharded.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_unpad_batch_rows_sharded.cpp @@ -4,7 +4,7 @@ #include #include "dataflow_api.h" -#include "debug/dprint.h" + void kernel_main() { // This kernel only supports unpadding the end rows of each batch uint32_t num_unpadded_output_rows = get_arg_val(0);