diff --git a/tests/tt_eager/python_api_testing/unit_testing/test_bert_ops.py b/tests/tt_eager/python_api_testing/unit_testing/test_bert_ops.py index d9a8e557c74..d4a931a48b3 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/test_bert_ops.py +++ b/tests/tt_eager/python_api_testing/unit_testing/test_bert_ops.py @@ -113,10 +113,10 @@ def test_bert_linear( # out_subblock_w = 4 # out_subblock_h = 4 - print("in0 block w h " + str(in0_block_w * 32) + " " + str(in0_block_h * 32)) - print("in1 block w h " + str(out_block_w * 32) + " " + str(in0_block_w * 32)) - print("out block w h " + str(out_block_w * 32) + " " + str(out_block_h * 32)) - print("out subblock w h " + str(out_subblock_w * 32) + " " + str(out_subblock_h * 32)) + logger.debug("in0 block w h " + str(in0_block_w * 32) + " " + str(in0_block_h * 32)) + logger.debug("in1 block w h " + str(out_block_w * 32) + " " + str(in0_block_w * 32)) + logger.debug("out block w h " + str(out_block_w * 32) + " " + str(out_block_h * 32)) + logger.debug("out subblock w h " + str(out_subblock_w * 32) + " " + str(out_subblock_h * 32)) interleaved_mem_config_L1 = ttl.tensor.MemoryConfig( memory_layout=ttl.tensor.TensorMemoryLayout.INTERLEAVED, @@ -297,10 +297,10 @@ def test_bert_linear_batch7( while out_block_w % out_subblock_w != 0: out_subblock_w = out_block_w // 2 - print("in0 block w h " + str(in0_block_w * 32) + " " + str(in0_block_h * 32)) - print("in1 block w h " + str(out_block_w * 32) + " " + str(in0_block_w * 32)) - print("out block w h " + str(out_block_w * 32) + " " + str(out_block_h * 32)) - print("out subblock w h " + str(out_subblock_w * 32) + " " + str(out_subblock_h * 32)) + logger.debug("in0 block w h " + str(in0_block_w * 32) + " " + str(in0_block_h * 32)) + logger.debug("in1 block w h " + str(out_block_w * 32) + " " + str(in0_block_w * 32)) + logger.debug("out block w h " + str(out_block_w * 32) + " " + str(out_block_h * 32)) + logger.debug("out subblock w h " + str(out_subblock_w * 32) + " " + str(out_subblock_h * 32)) interleaved_mem_config_L1 = ttl.tensor.MemoryConfig( memory_layout=ttl.tensor.TensorMemoryLayout.INTERLEAVED, diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp index 3b02137b2ce..3fc5af0ce1f 100644 --- a/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp @@ -89,13 +89,13 @@ operation::ProgramWithCallbacks bcast_multi_core_h(const Tensor &a, const Tensor program, reader_name, all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); std::map bcast_defines = bcast_op_utils::get_defines(bcast_dim, bcast_math); auto bcast_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp index 86dd099f4fa..379a8bf1ab5 100644 --- a/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp @@ -99,13 +99,13 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso program, reader_name, all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = reader_defines}); KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); auto bcast_kernel_id = tt_metal::CreateKernel( program, diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp index 8f9663b22dd..addca0b4386 100644 --- a/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp @@ -90,13 +90,13 @@ operation::ProgramWithCallbacks bcast_multi_core_w(const Tensor &a, const Tensor program, reader_name, all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); std::map bcast_defines = bcast_op_utils::get_defines(bcast_dim, bcast_math); auto bcast_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp b/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp index 4be64942e7b..41f3cf37bfc 100644 --- a/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp @@ -81,13 +81,13 @@ operation::ProgramWithCallbacks bcast_single_core(const Tensor &a, const Tensor program, reader_name, core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); const char* compute_name = bcast_op_utils::get_compute_name(bcast_dim); std::map bcast_defines = bcast_op_utils::get_defines(bcast_dim, bcast_math); diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core/bmm_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core/bmm_op_multi_core.cpp index 92c9ad1b00f..8106cae01b3 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core/bmm_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core/bmm_op_multi_core.cpp @@ -90,13 +90,13 @@ operation::ProgramWithCallbacks matmul_multi_core(const Tensor &a, const Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank_output_tiles_partitioned.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args_group_1 = { 1, // B diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse/bmm_op_multi_core_reuse.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse/bmm_op_multi_core_reuse.cpp index 0fa5b1e23ac..4294d122c54 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse/bmm_op_multi_core_reuse.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse/bmm_op_multi_core_reuse.cpp @@ -110,13 +110,13 @@ tt_metal::operation::ProgramWithCallbacks create_program( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Create compute kernel auto mm_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_generalized/bmm_op_multi_core_reuse_generalized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_generalized/bmm_op_multi_core_reuse_generalized.cpp index a0aedcbeb34..9947d389022 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_generalized/bmm_op_multi_core_reuse_generalized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_generalized/bmm_op_multi_core_reuse_generalized.cpp @@ -115,13 +115,13 @@ tt_metal::operation::ProgramWithCallbacks create_program( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Create compute kernel auto mm_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp index 07c8a5b0928..8dd8dc03777 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp @@ -12,6 +12,7 @@ #include "tt_metal/common/constants.hpp" #include "hostdevcommon/common_values.hpp" #include "tt_metal/detail/util.hpp" +#include "tt_metal/detail/tt_metal.hpp" using namespace tt::constants; using namespace tt; @@ -247,17 +248,22 @@ operation::ProgramWithCallbacks create_program_mcast_in0( } mm_kernel_in1_sender_writer_defines["SKIP_MCAST"] = "1"; + + // in1 is the reader of weights/output writer, and we choose to make it use the optimized reader noc + tt_metal::NOC in0_noc = detail::GetPreferredNOCForDRAMWrite(device->arch()); + tt_metal::NOC in1_noc = detail::GetPreferredNOCForDRAMRead(device->arch()); + auto mm_kernel_in0_sender_id = tt_metal::CreateKernel( program, in0_is_sharded ? "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_sender_receiver_padding_block_sharded.cpp" : "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp", mcast_sender, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_sender_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = in0_noc, .compile_args = in0_sender_compile_time_args}); auto mm_kernel_in1_sender_writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = in1_noc, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines}); KernelHandle mm_kernel_in0_receiver_id = 0; @@ -265,9 +271,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0( mm_kernel_in0_receiver_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_receiver.cpp", - /* in0_receiver_in1_sender, // If not using half-half noc setup */ mcast_receivers, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_receiver_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = in0_noc, .compile_args = in0_receiver_compile_time_args}); } // Compute kernel compile time args @@ -374,8 +379,11 @@ operation::ProgramWithCallbacks create_program_mcast_in0( in0_mcast_noc_y.push_back(device->worker_core_from_logical_core({0, core_idx_y}).y); } } - CoreCoord start_core_noc = device->worker_core_from_logical_core({start_core_x, start_core_y}); - CoreCoord end_core_noc = device->worker_core_from_logical_core({start_core_x + num_cores_c - 1, start_core_y + num_cores_r - 1}); + CoreCoord start_core_noc = top_left_core_physical; + CoreCoord end_core_noc = bottom_right_core_physical; + if (in0_noc == NOC::NOC_1) { + std::swap(start_core_noc, end_core_noc); + } for(uint32_t i = 0; i < num_cores; i++) { uint32_t core_idx_x = i % num_cores_c; @@ -404,10 +412,10 @@ operation::ProgramWithCallbacks create_program_mcast_in0( (std::uint32_t) in0_buffer->address(), (std::uint32_t) K * per_core_M * output_idx_y, // in0_tensor_start_tile_id // in0 mcast args - (std::uint32_t) top_left_core_physical.x, // in0_mcast_dest_noc_start_x - (std::uint32_t) top_left_core_physical.y, // in0_mcast_dest_noc_start_y - (std::uint32_t) bottom_right_core_physical.x, // in0_mcast_dest_noc_end_x - (std::uint32_t) bottom_right_core_physical.y, // in0_mcast_dest_noc_end_y + (std::uint32_t) start_core_noc.x, // in0_mcast_dest_noc_start_x + (std::uint32_t) start_core_noc.y, // in0_mcast_dest_noc_start_y + (std::uint32_t) end_core_noc.x, // in0_mcast_dest_noc_end_x + (std::uint32_t) end_core_noc.y, // in0_mcast_dest_noc_end_y // padding args (std::uint32_t) per_core_M // last_block_h @@ -767,24 +775,29 @@ operation::ProgramWithCallbacks create_program_mcast_in1( } mm_kernel_in0_sender_defines["SKIP_MCAST"] = "1"; + + // in1 is the reader of weights/output writer, and we choose to make it use the optimized reader noc + tt_metal::NOC in0_noc = detail::GetPreferredNOCForDRAMWrite(device->arch()); + tt_metal::NOC in1_noc = detail::GetPreferredNOCForDRAMRead(device->arch()); + auto mm_kernel_in0_sender_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_sender_compile_time_args, .defines = mm_kernel_in0_sender_defines}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = in0_noc, .compile_args = in0_sender_compile_time_args, .defines = mm_kernel_in0_sender_defines}); auto mm_kernel_in1_sender_writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp", mcast_sender, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = in1_noc, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines}); auto mm_kernel_in1_receiver_writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in1_receiver_writer_padding.cpp", mcast_receivers, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_receiver_writer_compile_time_args, .defines = mm_kernel_in1_receiver_writer_defines}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = in1_noc, .compile_args = in1_receiver_writer_compile_time_args, .defines = mm_kernel_in1_receiver_writer_defines}); // Compute kernel compile time args @@ -875,6 +888,13 @@ operation::ProgramWithCallbacks create_program_mcast_in1( std::vector reader_kernel_ids; std::vector writer_kernel_ids; + + CoreCoord start_core_noc = bottom_right_core_physical; + CoreCoord end_core_noc = top_left_core_physical; + if (in1_noc == NOC::NOC_0) { + std::swap(start_core_noc, end_core_noc); + } + for(uint32_t i = 0; i < num_cores; i++) { uint32_t core_idx_x = i % num_cores_c; uint32_t core_idx_y = i / num_cores_c; @@ -890,10 +910,10 @@ operation::ProgramWithCallbacks create_program_mcast_in1( (std::uint32_t) in1_buffer->address(), (std::uint32_t) per_core_N * output_idx_x, //in1_tensor_start_tile_id // in1 mcast args - (std::uint32_t) bottom_right_core_physical.x, // in1_mcast_dest_noc_start_x - (std::uint32_t) bottom_right_core_physical.y, // in1_mcast_dest_noc_start_y - (std::uint32_t) top_left_core_physical.x, // in1_mcast_dest_noc_end_x - (std::uint32_t) top_left_core_physical.y, // in1_mcast_dest_noc_end_y + (std::uint32_t) start_core_noc.x, // in1_mcast_dest_noc_start_x + (std::uint32_t) start_core_noc.y, // in1_mcast_dest_noc_start_y + (std::uint32_t) end_core_noc.x, // in1_mcast_dest_noc_end_x + (std::uint32_t) end_core_noc.y, // in1_mcast_dest_noc_end_y // WRITER // out tensor args diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp index d30c76d6869..30ad52dfd5e 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp @@ -11,6 +11,7 @@ #include "tt_metal/common/constants.hpp" #include "hostdevcommon/common_values.hpp" #include "tt_metal/detail/util.hpp" +#include "tt_metal/detail/tt_metal.hpp" using namespace tt::constants; using namespace tt; @@ -114,16 +115,15 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( uint32_t in0_end = num_cores_r - 1; uint32_t in1_end = num_cores_c - 1; - tt_metal::NOC in0_noc = tt_metal::NOC::RISCV_0_default; - tt_metal::NOC in1_noc = tt_metal::NOC::RISCV_1_default; - tt_metal::NOC in0_split_noc = tt_metal::NOC::RISCV_1_default; - tt_metal::NOC in1_split_noc = tt_metal::NOC::RISCV_0_default; + // in1 is the reader of weights/output writer, and we choose to make it use the optimized reader noc + tt_metal::NOC in0_noc = detail::GetPreferredNOCForDRAMWrite(device->arch()); + tt_metal::NOC in1_noc = detail::GetPreferredNOCForDRAMRead(device->arch()); + tt_metal::NOC in0_split_noc = detail::GetPreferredNOCForDRAMRead(device->arch()); + tt_metal::NOC in1_split_noc = detail::GetPreferredNOCForDRAMWrite(device->arch()); if (transpose_mcast) { std::swap(in0_sender, in1_sender); std::swap(in0_sender_in1_receiver, in0_receiver_in1_sender); std::swap(in0_end, in1_end); - // std::swap(in0_noc, in1_noc); - // std::swap(in0_split_noc, in1_split_noc); } if (in0_is_sharded) { in0_sender = all_cores; @@ -491,6 +491,9 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( } } } + if (in0_noc == NOC::NOC_1) { + std::swap(diff_start_coord, diff_end_coord); + } for(uint32_t core_idx_y = 0; core_idx_y < num_cores_r; ++core_idx_y) { for(uint32_t core_idx_x = 0; core_idx_x < num_cores_c; ++core_idx_x) { @@ -512,18 +515,27 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( uint32_t in1_idx = core_idx_x; auto in0_mcast_sender = left_core_physical; + auto in1_mcast_sender = top_core_physical; + + // Assuming in0 is NOC0 auto in0_mcast_start = left_core_plus_one_physical; auto in0_mcast_end = right_core_physical; - auto in1_mcast_sender = top_core_physical; + if (in0_noc == NOC::NOC_1) { + std::swap(in0_mcast_start, in0_mcast_end); + } + + // Assuming in1 is NOC1 auto in1_mcast_start = bottom_core_physical; auto in1_mcast_end = top_core_plus_one_physical; + if (in1_noc == NOC::NOC_0) { + std::swap(in1_mcast_start, in1_mcast_end); + } + if (transpose_mcast) { std::swap(in0_idx, in1_idx); std::swap(in0_mcast_sender, in1_mcast_sender); - std::swap(in0_mcast_start, in1_mcast_start); - std::swap(in0_mcast_end, in1_mcast_end); - std::swap(in0_mcast_start, in0_mcast_end); - std::swap(in1_mcast_start, in1_mcast_end); + std::swap(in0_mcast_start, in1_mcast_end); + std::swap(in0_mcast_end, in1_mcast_start); } // in0 sender diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp index d9788814e2c..ba2662b41d2 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp @@ -8,6 +8,7 @@ #include "tt_metal/common/constants.hpp" #include "hostdevcommon/common_values.hpp" #include "tt_metal/detail/util.hpp" +#include "tt_metal/detail/tt_metal.hpp" using namespace tt::constants; using namespace tt; @@ -89,37 +90,37 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in0_sender_in1_sender.cpp", in0_sender_in1_sender, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = detail::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = reader_compile_time_args}); auto mm_reader_kernel_in0_sender_in1_receiver_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in0_sender_in1_receiver.cpp", in0_sender_in1_receiver, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = detail::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = reader_compile_time_args}); auto mm_reader_kernel_in0_receiver_in1_sender_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in0_receiver_in1_sender.cpp", in0_receiver_in1_sender, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = detail::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args}); auto mm_reader_kernel_in0_receiver_in1_receiver_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in0_receiver_in1_receiver.cpp", in0_receiver_in1_receiver, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = detail::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args}); auto unary_writer_kernel_noc0_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp", all_except_left_column, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = detail::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args}); auto unary_writer_kernel_noc1_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp", left_column, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = writer_compile_time_args}); + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = detail::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = writer_compile_time_args}); // Compute kernel compile time args uint32_t num_blocks = (K/in0_block_w); @@ -396,19 +397,19 @@ operation::ProgramWithCallbacks create_program_mcast_in0( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in0_mcast_sender.cpp", mcast_senders, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto mm_reader_kernel_receiver_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in0_mcast_receiver.cpp", mcast_receivers, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); uint32_t num_blocks = (K/in0_block_w); @@ -655,19 +656,19 @@ operation::ProgramWithCallbacks create_program_mcast_in1( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in1_mcast_sender.cpp", mcast_senders, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto mm_reader_kernel_receiver_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_in1_mcast_receiver.cpp", mcast_receivers, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Compute kernel compile time args uint32_t num_blocks = (K/in0_block_w); diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp index b0cb121810f..30e32187aeb 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp @@ -9,6 +9,7 @@ #include "tt_metal/host_api.hpp" #include "tt_metal/common/constants.hpp" #include "tt_metal/detail/util.hpp" +#include "tt_metal/detail/tt_metal.hpp" using namespace tt::constants; using namespace tt; @@ -155,14 +156,14 @@ operation::ProgramWithCallbacks create_program( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0.cpp", left_half, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args, .defines = mm_kernel_in0_reader_defines} + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = detail::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = reader_compile_time_args, .defines = mm_kernel_in0_reader_defines} ); auto mm_kernel_in1_reader_writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_writer_bmm_tile_layout_in1.cpp", left_half, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_writer_compile_time_args, .defines = mm_kernel_in1_reader_writer_defines} + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = detail::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_writer_compile_time_args, .defines = mm_kernel_in1_reader_writer_defines} ); // right half @@ -170,14 +171,14 @@ operation::ProgramWithCallbacks create_program( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0.cpp", right_half, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = mm_kernel_in0_reader_defines} + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = detail::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args, .defines = mm_kernel_in0_reader_defines} ); auto mm_kernel_in1_reader_writer_other_noc_setup_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_writer_bmm_tile_layout_in1.cpp", right_half, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_writer_compile_time_args, .defines = mm_kernel_in1_reader_writer_defines} + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = detail::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = reader_writer_compile_time_args, .defines = mm_kernel_in1_reader_writer_defines} ); vector compute_kernel_args_group_1 = { diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_padding/bmm_op_multi_core_reuse_padding.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_padding/bmm_op_multi_core_reuse_padding.cpp index 93b7970f441..db7a029a503 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_padding/bmm_op_multi_core_reuse_padding.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_padding/bmm_op_multi_core_reuse_padding.cpp @@ -125,13 +125,13 @@ operation::ProgramWithCallbacks create_program( program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout_padding.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout_padding.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Create compute kernel auto mm_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core.cpp b/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core.cpp index b58229f4247..7e6b64465c0 100644 --- a/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core.cpp @@ -72,13 +72,13 @@ operation::ProgramWithCallbacks matmul_single_core(const Tensor &a, const Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank.cpp", core, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_bmm_8bank.cpp", core, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { B, // B diff --git a/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core_tilize_untilize.cpp b/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core_tilize_untilize.cpp index d1b57fa9968..4a64760003f 100644 --- a/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core_tilize_untilize.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core_tilize_untilize.cpp @@ -420,10 +420,7 @@ operation::ProgramWithCallbacks bmm_single_core_tilize_untilize( program, // program reader_kernel, // file name core_range, // core - tt_metal::DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .defines = all_defines} + tt_metal::ReaderDataMovementConfig{.compile_args = {}, .defines = all_defines} ); // number of data elements along height of an in0 block @@ -472,9 +469,7 @@ operation::ProgramWithCallbacks bmm_single_core_tilize_untilize( program, // program writer_kernel, // file name core_range, // core - tt_metal::DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args, .defines = all_defines} ); diff --git a/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp index 3c705e24448..5033a26a47f 100644 --- a/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp @@ -99,13 +99,13 @@ operation::ProgramWithCallbacks concat_multi_core(const std::vector &inp program, "tt_eager/tt_dnn/kernels/dataflow/reader_concat_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); for (uint32_t i = 0, num_tiles_written = 0; i < num_cores; i++){ CoreCoord core = {i / num_cores_y, i % num_cores_y}; diff --git a/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp b/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp index 5ec76ed4d76..367385c1fe0 100644 --- a/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp @@ -99,13 +99,13 @@ operation::ProgramWithCallbacks concat_single_core(const std::vector &in program, "tt_eager/tt_dnn/kernels/dataflow/reader_concat_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector writer_kernel_args = { dst_buffer->address(), diff --git a/tt_eager/tt_dnn/op_library/conv/conv_op.cpp b/tt_eager/tt_dnn/op_library/conv/conv_op.cpp index 09725cc9c24..99044650bd9 100644 --- a/tt_eager/tt_dnn/op_library/conv/conv_op.cpp +++ b/tt_eager/tt_dnn/op_library/conv/conv_op.cpp @@ -555,9 +555,7 @@ operation::ProgramWithCallbacks conv_as_large_bmm_single_core_(const Tensor& a, program, reader_kernel, core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, + ReaderDataMovementConfig{ .compile_args = reader_compile_time_args, .defines = reader_defines}); @@ -565,9 +563,7 @@ operation::ProgramWithCallbacks conv_as_large_bmm_single_core_(const Tensor& a, program, writer_kernel, core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, + WriterDataMovementConfig{ .compile_args = writer_compile_time_args, .defines = all_defines}); @@ -1252,13 +1248,13 @@ operation::ProgramWithCallbacks conv_as_large_bmm_with_address_map_single_core_( program, reader_kernel, core, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default}); + tt_metal::ReaderDataMovementConfig{}); std::vector writer_compile_time_args = {(uint32_t) (src0_dram_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0)}; auto writer_id = tt_metal::CreateKernel( program, writer_kernel, core, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args = { act_block_w_ntiles, diff --git a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp index 4f3c78e8378..911b8fac4ca 100644 --- a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp +++ b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp @@ -726,8 +726,8 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_(const Tensor& a, cons bias_ntiles_per_core }; - auto writer_mcast_noc = NOC::NOC_0; - auto reader_noc = writer_mcast_noc == NOC::NOC_0 ? NOC::NOC_1 : NOC::NOC_0; + auto writer_mcast_noc = detail::GetPreferredNOCForDRAMWrite(device->arch()); + auto reader_noc = detail::GetPreferredNOCForDRAMRead(device->arch()); auto writer_mcast_sender_id = CreateKernel( program, writer_mcast_sender_kernel, diff --git a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp index 253acdf78d2..bf47ab96070 100644 --- a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp +++ b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp @@ -770,8 +770,8 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_(const Tensor& bias_ntiles_per_core }; - auto writer_mcast_noc = NOC::NOC_0; - auto reader_noc = writer_mcast_noc == NOC::NOC_0 ? NOC::NOC_1 : NOC::NOC_0; + auto writer_mcast_noc = detail::GetPreferredNOCForDRAMWrite(device->arch()); + auto reader_noc = detail::GetPreferredNOCForDRAMRead(device->arch()); auto writer_mcast_sender_id = CreateKernel( program, writer_mcast_sender_kernel, diff --git a/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp index cb1c4924d94..01e44e1c3d5 100644 --- a/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp @@ -91,13 +91,13 @@ operation::ProgramWithCallbacks copy_multi_core(const Tensor &input, const Tenso program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = kernel_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = kernel_defines}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args, .defines = kernel_defines}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args, .defines = kernel_defines}); /* If we need dataformat conversion, use compute kernel bool fp32_dest_acc_en = false; diff --git a/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp b/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp index 3a1d0115438..e42d39e4459 100644 --- a/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp @@ -88,13 +88,13 @@ operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tens program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = kernel_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = kernel_defines}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args, .defines = kernel_defines}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args, .defines = kernel_defines}); /* If we need dataformat conversion, use compute kernel bool fp32_dest_acc_en = false; diff --git a/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp b/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp index cc0427e0e9d..bf0dadd8da4 100644 --- a/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp +++ b/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp @@ -519,7 +519,7 @@ operation::ProgramWithCallbacks downsample_single_core(const Tensor &a, std::arr program, "tt_eager/tt_dnn/op_library/downsample/kernels/downsample_writer_kernel.cpp", core_range, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { input_cb_index, diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp index 7311174c8b5..1f6f184fc6e 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp @@ -136,13 +136,13 @@ operation::ProgramWithCallbacks eltwise_binary_multi_core(const Tensor &a, const program, "tt_eager/tt_dnn/kernels/dataflow/reader_binary_interleaved_start_id.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = reader_defines}); KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, (block_sharded and not out_sharded) ? "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded_blocks_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args, .defines = writer_defines}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args, .defines = writer_defines}); auto eltwise_binary_kernel_id = tt_metal::CreateKernel( program, diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp index d0dd885d0cb..ee2c31542eb 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp @@ -83,13 +83,13 @@ operation::ProgramWithCallbacks eltwise_binary_single_core(const Tensor &a, cons program, "tt_eager/tt_dnn/kernels/dataflow/reader_binary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args = { }; diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp index de41302a09a..b9b97281c40 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp @@ -60,13 +60,13 @@ operation::ProgramWithCallbacks eltwise_unary_multi_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args_group_1 = { num_tiles_per_core_group_1, // per_core_block_cnt diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp index c060a9d9e72..6957e23026b 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp @@ -54,13 +54,13 @@ operation::ProgramWithCallbacks eltwise_unary_single_core(const Tensor &a, Tenso program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args=reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args = { num_tiles, // per_core_block_cnt diff --git a/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp b/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp index bc0e33873f0..3b83a4411fe 100644 --- a/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp +++ b/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp @@ -154,9 +154,7 @@ operation::ProgramWithCallbacks embeddings_tilized( program, "tt_eager/tt_dnn/op_library/embeddings/kernels/dataflow/embeddings_tilize.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = embedding_compile_time_args, .defines = embedding_defines}); @@ -191,9 +189,7 @@ operation::ProgramWithCallbacks embeddings_tilized( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args}); uint32_t input_offset = 0; @@ -394,9 +390,7 @@ operation::ProgramWithCallbacks embeddings_rm( program, "tt_eager/tt_dnn/op_library/embeddings/kernels/dataflow/embeddings.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = embedding_compile_time_args, .defines = embedding_defines}); @@ -413,10 +407,7 @@ operation::ProgramWithCallbacks embeddings_rm( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); uint32_t input_offset = 0; uint32_t weight_offset = 0; diff --git a/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp b/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp index 8f503844582..5241e96bbe5 100644 --- a/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp +++ b/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp @@ -45,7 +45,7 @@ operation::ProgramWithCallbacks fill_rm_single_core(const Tensor& any, Tensor &o tt_metal::KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/fill_rm_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args=reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args=reader_compile_time_args}); tt_metal::SetRuntimeArgs( program, binary_reader_kernel_id, core, diff --git a/tt_eager/tt_dnn/op_library/layernorm/layernorm_op.cpp b/tt_eager/tt_dnn/op_library/layernorm/layernorm_op.cpp index 50ca928e2db..e58f950daa8 100644 --- a/tt_eager/tt_dnn/op_library/layernorm/layernorm_op.cpp +++ b/tt_eager/tt_dnn/op_library/layernorm/layernorm_op.cpp @@ -195,14 +195,14 @@ operation::ProgramWithCallbacks layernorm_( program, use_row_major_kernel ? "tt_eager/tt_dnn/op_library/layernorm/kernels/reader_unary_interleaved_ln_rm_gb.cpp" : "tt_eager/tt_dnn/op_library/layernorm/kernels/reader_unary_interleaved_ln.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines} + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = reader_defines} ); auto writer_kernels_id = CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id_blocked.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_compile_time_args} + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args} ); vector compute_args = { Wt, block_size, gamma.has_value(), beta.has_value() }; @@ -671,19 +671,19 @@ operation::ProgramWithCallbacks layernorm_sharded_( program, "tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/reader_mcast_sender_unary_sharded_ln.cpp", top_row, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_mcast_sender_compile_time_args, .defines = reader_mcast_sender_defines} + tt_metal::ReaderDataMovementConfig{.compile_args = reader_mcast_sender_compile_time_args, .defines = reader_mcast_sender_defines} ); auto reader_mcast_receiver_kernels_id_all_to_all = CreateKernel( program, "tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/reader_mcast_receiver_unary_sharded_ln.cpp", all_to_all_workers_except_top_row, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_mcast_receiver_all_to_all_compile_time_args, .defines = reader_mcast_receiver_defines} + tt_metal::ReaderDataMovementConfig{.compile_args = reader_mcast_receiver_all_to_all_compile_time_args, .defines = reader_mcast_receiver_defines} ); auto reader_mcast_receiver_kernels_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/layernorm/kernels/dataflow/reader_mcast_receiver_unary_sharded_ln.cpp", not_all_to_all_workers, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_mcast_receiver_compile_time_args, .defines = reader_mcast_receiver_defines} + tt_metal::ReaderDataMovementConfig{.compile_args = reader_mcast_receiver_compile_time_args, .defines = reader_mcast_receiver_defines} ); // writer defines std::map writer_defines; @@ -746,13 +746,13 @@ operation::ProgramWithCallbacks layernorm_sharded_( program, writer_kernel, all_to_all_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_mcast_sender_compile_time_args, .defines = writer_defines} + tt_metal::WriterDataMovementConfig{.compile_args = writer_mcast_sender_compile_time_args, .defines = writer_defines} ); auto writer_mcast_receiver_kernels_id = CreateKernel( program, writer_kernel, not_all_to_all_workers, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_mcast_receiver_compile_time_args, .defines = writer_defines} + tt_metal::WriterDataMovementConfig{.compile_args = writer_mcast_receiver_compile_time_args, .defines = writer_defines} ); // defines std::map eltwise_binary_defines; diff --git a/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp b/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp index 962d81ca1f0..8396c761032 100644 --- a/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp @@ -93,9 +93,7 @@ KernelHandle CreateReadKernel( program, file_name, core_spec, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = compile_args, .defines = defines}); } @@ -110,9 +108,7 @@ KernelHandle CreateWriteKernel( program, file_name, core_spec, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = compile_args, .defines = defines}); } diff --git a/tt_eager/tt_dnn/op_library/nlp_tms/nlp_concat_heads.cpp b/tt_eager/tt_dnn/op_library/nlp_tms/nlp_concat_heads.cpp index 35f8912959a..9a8de8eaae5 100644 --- a/tt_eager/tt_dnn/op_library/nlp_tms/nlp_concat_heads.cpp +++ b/tt_eager/tt_dnn/op_library/nlp_tms/nlp_concat_heads.cpp @@ -90,12 +90,12 @@ operation::ProgramWithCallbacks multi_core_nlp_concat_heads(const Tensor &a, Ten program, "tt_eager/tt_dnn/op_library/nlp_tms/kernels/reader_tm_tile_layout_nlp_concat_heads_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = compile_time_args}); writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/nlp_tms/kernels/reader_tm_tile_layout_nlp_concat_heads_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = compile_time_args}); } else { std::vector reader_compile_time_args = { // interleaved accessor args @@ -114,13 +114,13 @@ operation::ProgramWithCallbacks multi_core_nlp_concat_heads(const Tensor &a, Ten program, "tt_eager/tt_dnn/op_library/nlp_tms/kernels/reader_tm_tile_layout_nlp_concat_heads.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); } diff --git a/tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads.cpp b/tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads.cpp index b927a77d443..879d7acb38d 100644 --- a/tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads.cpp +++ b/tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads.cpp @@ -94,12 +94,12 @@ operation::ProgramWithCallbacks multi_core_nlp_create_qkv_heads(const Tensor &a, program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_tm_tile_layout_nlp_create_qkv_heads.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Create circular buffers diff --git a/tt_eager/tt_dnn/op_library/pad/pad_op.cpp b/tt_eager/tt_dnn/op_library/pad/pad_op.cpp index 7dcaab8d40f..1619251b42c 100644 --- a/tt_eager/tt_dnn/op_library/pad/pad_op.cpp +++ b/tt_eager/tt_dnn/op_library/pad/pad_op.cpp @@ -75,15 +75,11 @@ operation::ProgramWithCallbacks pad_rm_reader_writer(const Tensor &a, KernelHandle reader_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/reader_pad_dims_rm_interleaved.cpp", cores, - DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}); + ReaderDataMovementConfig{.compile_args = reader_ct_args}); KernelHandle writer_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/writer_pad_dims_rm_interleaved.cpp", cores, - DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); uint32_t padded_row_diff_size_nbytes = padded_row_size_nbytes - unpadded_row_size_nbytes; #if 0 @@ -221,9 +217,7 @@ operation::ProgramWithCallbacks pad_rm_opt(const Tensor &a, KernelHandle reader_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/pad_dims_rm_interleaved_opt.cpp", core, - DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}); + ReaderDataMovementConfig{.compile_args = reader_ct_args}); uint32_t padded_row_diff_size_nbytes = padded_row_size_nbytes - unpadded_row_size_nbytes; #if 0 @@ -353,7 +347,7 @@ operation::ProgramWithCallbacks pad_rm(const Tensor &a, Tensor &output, const Sh program, "tt_eager/tt_dnn/kernels/dataflow/pad_dims_rm_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = compile_time_args_vec}); + tt_metal::ReaderDataMovementConfig{.compile_args = compile_time_args_vec}); tt_metal::SetRuntimeArgs( program, @@ -468,13 +462,13 @@ operation::ProgramWithCallbacks pad_tile(const Tensor &a, Tensor& output, const program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_pad_dims_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); tt_metal::SetRuntimeArgs( program, diff --git a/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp index 600f2b207a7..d7b071abd8d 100644 --- a/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp @@ -194,15 +194,11 @@ operation::ProgramWithCallbacks pad_rm_reader_writer_multi_core(const Tensor &a, KernelHandle reader_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/reader_pad_dims_rm_interleaved.cpp", all_cores, - DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}); + ReaderDataMovementConfig{.compile_args = reader_ct_args}); KernelHandle writer_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/writer_pad_dims_rm_interleaved.cpp", all_cores, - DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); // int32_t padded_row_diff_size_nbytes = padded_row_size_nbytes - unpadded_row_size_nbytes; log_rt_args(CoreCoord{0, 0}, reader_ct_args); diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp index 92bd844d7e0..e88765d35dd 100644 --- a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp @@ -464,9 +464,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic(const Tensor &inp 0, // right_in_stick_end, 0, // my_core }; - auto reader_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = reader_ct_args}; + auto reader_config = ReaderDataMovementConfig{.compile_args = reader_ct_args}; std::string reader_kernel_fname; if (input.memory_config().is_sharded()) { reader_kernel_fname = std::string("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); @@ -486,10 +484,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic(const Tensor &inp writer_defines["SHARDED_OUT"] = "1"; } std::vector writer_ct_args = reader_ct_args; - auto writer_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = writer_ct_args, - .defines = writer_defines}; + auto writer_config = WriterDataMovementConfig{.compile_args = writer_ct_args, .defines = writer_defines}; std::string writer_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, @@ -866,9 +861,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core(const Tensor &input, Tens const_buffer_size * in_nbytes, (in_cb_page_nelems_padded * out_nelems * 2) >> 5 // TODO: generalize num rows to fill in in_cb }; - auto reader_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}; + auto reader_config = ReaderDataMovementConfig{.compile_args = reader_ct_args}; std::string reader_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); auto reader_kernel = CreateKernel(program, reader_kernel_fname, @@ -879,9 +872,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core(const Tensor &input, Tens * Writer Kernel: output cb -> output rows */ std::vector writer_ct_args = reader_ct_args; - auto writer_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}; + auto writer_config = WriterDataMovementConfig{.compile_args = writer_ct_args}; std::string writer_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, @@ -1314,9 +1305,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T 0, // in_skip_after_each_full_row 0, // skip_after_each_stick }; - auto reader_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = reader_ct_args}; + auto reader_config = ReaderDataMovementConfig{.compile_args = reader_ct_args}; std::string reader_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp"); auto reader_kernel = CreateKernel(program, reader_kernel_fname, @@ -1329,10 +1318,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T std::map writer_defines; writer_defines["SHARDED_OUT"] = "1"; std::vector writer_ct_args = reader_ct_args; - auto writer_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = writer_ct_args, - .defines = writer_defines}; + auto writer_config = WriterDataMovementConfig{.compile_args = writer_ct_args, .defines = writer_defines}; std::string writer_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp index 832ac54de65..e342bc7118f 100644 --- a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp +++ b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp @@ -472,9 +472,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T 0, // partial_bottom_image_nrows 0, // partial_last_row_nsticks }; - auto reader_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = reader_ct_args}; + auto reader_config = ReaderDataMovementConfig{.compile_args = reader_ct_args}; std::string reader_kernel_fname; if (input.memory_config().is_sharded()) { reader_kernel_fname = std::string("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); @@ -494,10 +492,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T writer_defines["SHARDED_OUT"] = "1"; } std::vector writer_ct_args = reader_ct_args; - auto writer_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = writer_ct_args, - .defines = writer_defines}; + auto writer_config = WriterDataMovementConfig{.compile_args = writer_ct_args, .defines = writer_defines}; std::string writer_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_single_core.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_single_core.cpp index f89e154d59d..fe69643375e 100644 --- a/tt_eager/tt_dnn/op_library/pool/max_pool_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/pool/max_pool_single_core.cpp @@ -150,9 +150,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten const_buffer_size * in_nbytes, (in_cb_page_nelems_padded * out_nelems * 2) >> 5 // TODO: generalize num rows to fill in in_cb }; - auto reader_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}; + auto reader_config = ReaderDataMovementConfig{.compile_args = reader_ct_args}; std::string reader_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); auto reader_kernel = CreateKernel(program, reader_kernel_fname, @@ -201,9 +199,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten */ std::vector writer_ct_args = reader_ct_args; std::vector writer_rt_args = reader_rt_args; - auto writer_config = DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}; + auto writer_config = WriterDataMovementConfig{.compile_args = writer_ct_args}; std::string writer_kernel_fname("tt_eager/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, diff --git a/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp index 5f75cbbad05..577f94c8386 100644 --- a/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp +++ b/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp @@ -114,7 +114,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out program, "tt_eager/tt_dnn/op_library/reduce/kernels/dataflow/reader_unary_transpose_wh_interleaved_input_cols_partitioned_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = reader_defines}); } else { bool src0_is_dram = src0_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; std::vector reader_compile_time_args = { @@ -131,7 +131,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out program, "tt_eager/tt_dnn/op_library/reduce/kernels/dataflow/reader_unary_transpose_wh_interleaved_input_cols_partitioned.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = reader_defines}); } tt_metal::Buffer *dst_buffer = output.buffer(); @@ -145,10 +145,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); } else { bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; std::vector writer_compile_time_args = { @@ -160,7 +157,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); } std::map reduce_defines = reduce_op_utils::get_defines(reduce_op, reduce_dim); vector compute_kernel_args_group_1 = { diff --git a/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp b/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp index 9b64a03bca9..e773f99d0c1 100644 --- a/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp +++ b/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp @@ -81,13 +81,13 @@ operation::ProgramWithCallbacks reduce_multi_core_w(const Tensor &a, Tensor& out program, "tt_eager/tt_dnn/op_library/reduce/kernels/dataflow/reader_unary_reduce_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); std::map reduce_defines = reduce_op_utils::get_defines(reduce_op, reduce_dim); vector compute_kernel_args_group_1 = { diff --git a/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp b/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp index 3db93bb6f95..50e8056a531 100644 --- a/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp @@ -90,13 +90,13 @@ operation::ProgramWithCallbacks reduce_single_core(const Tensor &a, Tensor& outp "tt_eager/tt_dnn/kernels/dataflow/reader_unary_transpose_wh_interleaved.cpp" : "tt_eager/tt_dnn/op_library/reduce/kernels/dataflow/reader_unary_reduce_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines = reader_defines}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args = { Ht, // Ht diff --git a/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp b/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp index 0b643e1212d..04c1e7453e4 100644 --- a/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp +++ b/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp @@ -61,13 +61,13 @@ operation::ProgramWithCallbacks reshape_tile_single_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); tt_metal::SetRuntimeArgs( program, @@ -204,13 +204,13 @@ operation::ProgramWithCallbacks reshape_rm_single_core(const Tensor &a, Tensor& program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_reshape_stick_layout_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_reshape_stick_layout_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // No compute required, so using blank kernel vector compute_args = { diff --git a/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp index 4fbf12a2709..087d28779a4 100644 --- a/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp @@ -181,13 +181,13 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core(const Tensor &input, program, "tt_eager/tt_dnn/kernels/dataflow/reader_rotary_embedding_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines=kernel_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines=kernel_defines}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_rotary_embedding_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args, .defines=kernel_defines}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args, .defines=kernel_defines}); vector compute_kernel_args = { (std::uint32_t)input_cb_index, diff --git a/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp b/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp index 3616d69a4ba..8b1b113286a 100644 --- a/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp @@ -182,13 +182,13 @@ operation::ProgramWithCallbacks rotary_embedding_single_core(const Tensor &input program, "tt_eager/tt_dnn/kernels/dataflow/reader_rotary_embedding_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines=kernel_defines}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args, .defines=kernel_defines}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_rotary_embedding_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args, .defines=kernel_defines}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args, .defines=kernel_defines}); vector compute_kernel_args = { (std::uint32_t)input_cb_index, diff --git a/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp b/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp index 97c39853d1a..93806a28d3a 100644 --- a/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp @@ -82,13 +82,13 @@ operation::ProgramWithCallbacks rotate_half_single_core(const Tensor &input, Ten program, "tt_eager/tt_dnn/kernels/dataflow/reader_rotate_half_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_rotate_half_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); std::map bcast_compute_defines = { {"BCAST_OP", "mul_tiles_bcast"}, diff --git a/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp index fd192e21629..252489b80bd 100644 --- a/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp @@ -106,9 +106,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded_blocks_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = reader_compile_time_args}); } else { bool src_stick_size_is_power_of_two = is_power_of_two_at_least_32(num_units_per_row); @@ -123,9 +121,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = reader_compile_time_args}); } @@ -134,9 +130,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args}); if (convert_df) { @@ -354,9 +348,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = reader_compile_time_args}); bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; @@ -369,9 +361,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded_blocks_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args}); } else { bool dst_stick_size_is_power_of_two = is_power_of_two_at_least_32(num_units_per_row); @@ -387,9 +377,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core( "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/" "writer_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args}); } if (convert_df) { diff --git a/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp b/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp index e4fb1f90054..a1eee59e95b 100644 --- a/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp +++ b/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp @@ -109,22 +109,16 @@ operation::ProgramWithCallbacks scale_mask_softmax_(const Tensor &input_tensor, } auto reader_kernels_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/softmax/kernels/reader_unary_interleaved_sm.cpp", all_device_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = reader_compile_time_args, .defines = softmax_defines }); - //DataMovementProcessor::RISCV_1, core.x < 6 ? NOC::RISCV_1_default : NOC::RISCV_0_default); auto writer_kernels_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/softmax/kernels/writer_unary_interleaved_start_id_blocked_sm.cpp", all_device_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args }); - //DataMovementProcessor::RISCV_0, core.x < 6 ? NOC::RISCV_0_default : NOC::RISCV_1_default); // for broadcasting in H direction we need to // NCHt, Nt, Wt @@ -436,9 +430,7 @@ operation::ProgramWithCallbacks scale_mask_softmax_sharded_( program, use_row_major_kernel ? "tt_eager/tt_dnn/op_library/softmax/kernels/dataflow/reader_unary_sharded_sm_rm_mask.cpp" : "tt_eager/tt_dnn/op_library/softmax/kernels/dataflow/reader_unary_sharded_sm.cpp", all_device_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = reader_compile_time_args, .defines = softmax_defines }); diff --git a/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp b/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp index 8e64ce8db93..311e014e1d4 100644 --- a/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp +++ b/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp @@ -198,18 +198,14 @@ operation::ProgramWithCallbacks split_last_dim_two_chunks_tiled( program, "tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_split_two_chunks.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_1, - .noc = tt_metal::NOC::RISCV_1_default, + tt_metal::ReaderDataMovementConfig{ .compile_args = reader_compile_time_args}); auto writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_tm_tile_layout_split_two_chunks.cpp", all_cores, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, + tt_metal::WriterDataMovementConfig{ .compile_args = writer_compile_time_args}); // Dummy compute kernel diff --git a/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp index 21eb0eea5b8..8f1db1182d6 100644 --- a/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp @@ -201,9 +201,7 @@ operation::ProgramWithCallbacks tilize_multi_core_interleaved(const Tensor &a, T program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_split_rows_interleaved.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, + ReaderDataMovementConfig{ .compile_args = reader_ct_args}); /** writer @@ -217,9 +215,7 @@ operation::ProgramWithCallbacks tilize_multi_core_interleaved(const Tensor &a, T program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, + WriterDataMovementConfig{ .compile_args = writer_ct_args}); /** compute @@ -419,13 +415,13 @@ operation::ProgramWithCallbacks tilize_multi_core_sharded(const Tensor &input, T program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { uint32_t(num_tiles_per_shard / num_tiles_per_row), @@ -569,7 +565,7 @@ operation::ProgramWithCallbacks tilize_with_val_padding_multi_core(const Tensor program, "tt_eager/tt_dnn/op_library/tilize/kernels/dataflow/reader_unary_pad_height_width_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_ct_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_ct_args}); /** writer */ @@ -582,9 +578,7 @@ operation::ProgramWithCallbacks tilize_with_val_padding_multi_core(const Tensor program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, + WriterDataMovementConfig{ .compile_args = writer_ct_args}); /** compute diff --git a/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp b/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp index 24e8c854901..39e865daa50 100644 --- a/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp @@ -115,14 +115,14 @@ operation::ProgramWithCallbacks tilize_single_core(const Tensor &a, Tensor& outp program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_split_rows_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); // Tilized writer tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { uint32_t(num_tiles / num_tiles_per_block), // per_core_block_cnt @@ -318,14 +318,14 @@ operation::ProgramWithCallbacks tilize_with_val_padding_single_core(const Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_pad_dims_split_rows.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); // Tilized writer tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args = { uint32_t(num_tiles / num_tiles_per_block), diff --git a/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_attn_matmul/multi_core_attn_matmul.cpp b/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_attn_matmul/multi_core_attn_matmul.cpp index 5f3f820594a..03137e7293f 100644 --- a/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_attn_matmul/multi_core_attn_matmul.cpp +++ b/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_attn_matmul/multi_core_attn_matmul.cpp @@ -125,13 +125,13 @@ operation::ProgramWithCallbacks multi_core_attn_matmul(const Tensor &a, const Te program, "tt_eager/tt_dnn/kernels/dataflow/reader_transformer_attn_matmul.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto writer_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_device_cores, - tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { (uint32_t) transpose_hw_bool, // transpose_hw for matmul_init diff --git a/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_concatenate_heads/multi_core_concatenate_heads.cpp b/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_concatenate_heads/multi_core_concatenate_heads.cpp index 56a5b85f342..4a7dd81d9da 100644 --- a/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_concatenate_heads/multi_core_concatenate_heads.cpp +++ b/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_concatenate_heads/multi_core_concatenate_heads.cpp @@ -100,12 +100,12 @@ operation::ProgramWithCallbacks multi_core_concat_heads(const Tensor &a, Tensor& program, "tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_concat_heads.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_tm_tile_layout_concat_heads.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Dummy compute kernel std::vector compute_args = {0}; // dummy diff --git a/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_split_fused_qkv_and_split_heads/multi_core_split_fused_qkv_and_split_heads.cpp b/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_split_fused_qkv_and_split_heads/multi_core_split_fused_qkv_and_split_heads.cpp index 02f9f6f82a6..deb6d9efdde 100644 --- a/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_split_fused_qkv_and_split_heads/multi_core_split_fused_qkv_and_split_heads.cpp +++ b/tt_eager/tt_dnn/op_library/transformer_tms/multi_core_split_fused_qkv_and_split_heads/multi_core_split_fused_qkv_and_split_heads.cpp @@ -119,12 +119,12 @@ operation::ProgramWithCallbacks multi_core_split_fused_qkv_and_split_heads(const program, "tt_eager/tt_dnn/kernels/dataflow/reader_tm_tile_layout_create_qkv_heads.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); auto writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_tm_tile_layout_create_qkv_heads.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // Dummy compute kernel std::vector compute_args = {num_tiles_per_tensor}; @@ -293,7 +293,7 @@ operation::ProgramWithCallbacks multi_core_split_fused_qkv_and_split_heads_shard program, "tt_eager/tt_dnn/op_library/transformer_tms/dataflow/reader_tm_tile_layout_create_qkv_heads_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); // writer std::vector writer_compile_time_args = { (std::uint32_t) num_heads_per_tensor, @@ -309,7 +309,7 @@ operation::ProgramWithCallbacks multi_core_split_fused_qkv_and_split_heads_shard program, "tt_eager/tt_dnn/op_library/transformer_tms/dataflow/writer_tm_tile_layout_create_qkv_heads_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); // compute kernel std::vector compute_args = {num_tiles_per_tensor}; auto compute_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp index 62116c806e5..8629cf53c68 100644 --- a/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp @@ -140,13 +140,13 @@ operation::ProgramWithCallbacks transpose_cn_single_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_cn_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); auto all_runtime_args = std::move(get_runtime_args_mc_cn(a, output, num_cores_total, num_cores, num_cores_y, core_group_1, num_tiles_per_core_group_1, core_group_2, num_tiles_per_core_group_2)); diff --git a/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp index bebe91515e5..4567ed5754c 100644 --- a/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp @@ -151,13 +151,13 @@ operation::ProgramWithCallbacks transpose_hc_multi_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); auto all_runtime_args = get_runtime_args_mc_hc(a, output, num_cores_total, num_cores, num_cores_y, core_group_1, num_tiles_per_core_group_1, core_group_2, num_tiles_per_core_group_2); diff --git a/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp b/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp index 122cc4d25c9..fd6b3a35370 100644 --- a/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp @@ -88,13 +88,13 @@ operation::ProgramWithCallbacks transpose_wh_single_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_transpose_wh_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); @@ -239,13 +239,13 @@ operation::ProgramWithCallbacks transpose_hc_single_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); auto all_runtime_args = get_runtime_args_hc(a, output); @@ -354,13 +354,13 @@ operation::ProgramWithCallbacks transpose_cn_single_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_cn_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); auto all_runtime_args = get_runtime_args_cn(a, output); diff --git a/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp index ab963afef14..b7cab777fc7 100644 --- a/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp @@ -148,13 +148,13 @@ operation::ProgramWithCallbacks transpose_wh_multi_core(const Tensor &a, Tensor program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_wh_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); auto compute_kernel_id = tt_metal::CreateKernel( diff --git a/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp index 5a881cb91a8..a15b7b00749 100644 --- a/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp @@ -175,13 +175,13 @@ operation::ProgramWithCallbacks unpad_rm_multi_core(const Tensor &a, Tensor& out program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_rm_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args_vec}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args_vec}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args_vec}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args_vec}); auto all_runtime_args = get_unpad_runtime_args_rm(a, output, num_cores_total, num_cores, num_cores_y, core_group_1, core_group_2, num_sticks_per_core_group_1, num_sticks_per_core_group_2); @@ -397,13 +397,13 @@ operation::ProgramWithCallbacks unpad_tile_multi_core(const Tensor &a, Tensor& o program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", total_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); diff --git a/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp b/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp index d7c9ed31e8a..aefeb9bd134 100644 --- a/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp @@ -130,13 +130,13 @@ operation::ProgramWithCallbacks unpad_rm_single_core(const Tensor &a, Tensor& ou program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_rm_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args_vec}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args_vec}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args_vec}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args_vec}); tt_metal::SetRuntimeArgs( @@ -285,13 +285,13 @@ operation::ProgramWithCallbacks unpad_tile_single_core(const Tensor &a, Tensor& program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); diff --git a/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp index 696c97691f8..2bf65c996d8 100644 --- a/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp @@ -244,7 +244,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_ct_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_ct_args}); } else { bool src0_is_dram = src0_buffer->buffer_type() == BufferType::DRAM ? 1 : 0; vector reader_ct_args = { @@ -255,9 +255,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, + ReaderDataMovementConfig{ .compile_args = reader_ct_args}); } @@ -272,7 +270,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_ct_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_ct_args}); } else { bool out_is_dram = dst_buffer->buffer_type() == BufferType::DRAM ? 1 : 0; if (src_block_sharded) { @@ -283,10 +281,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_8bank_blocks.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); } else { bool stick_size_is_power_of_two = is_power_of_two_at_least_32(block_size_nbytes); uint32_t log2_stick_size = stick_size_is_power_of_two ? (std::uint32_t) std::log2(block_size_nbytes) : 0; @@ -300,10 +295,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_split_rows_interleaved.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); } } @@ -673,7 +665,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_ct_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_ct_args}); /** writer */ @@ -687,10 +679,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor program, "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_unpad_batch_rows_sharded.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); } else { bool out_is_dram = dst_buffer->buffer_type() == BufferType::DRAM ? 1 : 0; vector writer_ct_args = { @@ -700,10 +689,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_8bank_blocks.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); } /** compute diff --git a/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp b/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp index 382e4112b2a..270028fe913 100644 --- a/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp @@ -108,14 +108,14 @@ operation::ProgramWithCallbacks untilize_single_core(const Tensor &a, Tensor& ou program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); // Untilized writer tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_split_rows_interleaved.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { uint32_t(num_tiles / num_tiles_per_block), // per_core_block_cnt @@ -287,14 +287,14 @@ operation::ProgramWithCallbacks untilize_with_unpadding_single_core(const Tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); // Untilized writer tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_unpad_dims_split_rows.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_args = { uint32_t(num_tiles / num_tiles_per_block), diff --git a/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp b/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp index bc58ef916b7..a765956deab 100644 --- a/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp @@ -265,10 +265,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}); + ReaderDataMovementConfig{.compile_args = reader_ct_args}); /** writer */ @@ -288,10 +285,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i program, "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); /** compute */ @@ -903,10 +897,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default, - .compile_args = reader_ct_args}); + ReaderDataMovementConfig{.compile_args = reader_ct_args}); /** writer */ @@ -926,10 +917,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a program, "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp", all_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default, - .compile_args = writer_ct_args}); + WriterDataMovementConfig{.compile_args = writer_ct_args}); /** compute */ diff --git a/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp index 3ea86119fa9..bb8804f326a 100644 --- a/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp @@ -59,13 +59,13 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); for (uint32_t i = 0, num_tiles_written = 0; i < num_cores; i++){ CoreCoord core = {i / num_cores_y, i % num_cores_y}; diff --git a/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp b/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp index 27f4a076a37..2ce137a556a 100644 --- a/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp @@ -104,13 +104,13 @@ operation::ProgramWithCallbacks update_cache_single_core(const Tensor& cache_ten program, "tt_eager/tt_dnn/kernels/dataflow/reader_update_cache_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_update_cache_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); vector compute_kernel_args = { src0_cb_index, @@ -234,13 +234,13 @@ operation::ProgramWithCallbacks fill_cache_single_core(const Tensor& cache_tenso program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); + tt_metal::ReaderDataMovementConfig{.compile_args = reader_compile_time_args}); tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, - tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args}); + tt_metal::WriterDataMovementConfig{.compile_args = writer_compile_time_args}); SetRuntimeArgs( program, diff --git a/tt_metal/detail/util.hpp b/tt_metal/detail/util.hpp index e5ce1a424d9..7e0723653bb 100644 --- a/tt_metal/detail/util.hpp +++ b/tt_metal/detail/util.hpp @@ -5,6 +5,7 @@ #pragma once #include "tt_metal/common/tt_backend_api_types.hpp" #include "tt_metal/common/math.hpp" +#include "tt_metal/impl/kernels/data_types.hpp" namespace tt::tt_metal::detail{ @@ -29,4 +30,26 @@ namespace tt::tt_metal::detail{ return num_equally_distributed_pages * round_up(page_size_bytes, ADDRESS_ALIGNMENT); } + inline NOC GetPreferredNOCForDRAMRead(ARCH arch) { + switch (arch) { + case ARCH::GRAYSKULL: + return NOC::NOC_1; + case ARCH::WORMHOLE_B0: + default: + return NOC::NOC_0; + + } + } + + inline NOC GetPreferredNOCForDRAMWrite(ARCH arch) { + switch (arch) { + case ARCH::GRAYSKULL: + return NOC::NOC_0; + case ARCH::WORMHOLE_B0: + default: + return NOC::NOC_1; + + } + } + } diff --git a/tt_metal/host_api.hpp b/tt_metal/host_api.hpp index 01669c4a1ae..4db3c6223ed 100644 --- a/tt_metal/host_api.hpp +++ b/tt_metal/host_api.hpp @@ -211,7 +211,6 @@ void DeallocateBuffer(Buffer &buffer); // ================================================== // COMPILE & EXECUTE KENRNELS -// // ================================================== /** diff --git a/tt_metal/impl/kernels/data_types.hpp b/tt_metal/impl/kernels/data_types.hpp new file mode 100644 index 00000000000..5f8009532f3 --- /dev/null +++ b/tt_metal/impl/kernels/data_types.hpp @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +namespace tt::tt_metal { + +enum class DataMovementProcessor { + RISCV_0 = 0, // BRISC + RISCV_1 = 1, // NCRISC +}; + +enum NOC : uint8_t { + RISCV_0_default = 0, + RISCV_1_default = 1, + NOC_0 = 0, + NOC_1 = 1, +}; + +enum Eth : uint8_t { + SENDER = 0, + RECEIVER = 1, +}; + +} // namespace tt::tt_metal diff --git a/tt_metal/impl/kernels/kernel_types.hpp b/tt_metal/impl/kernels/kernel_types.hpp index 8b702eca750..11b831c4a7c 100644 --- a/tt_metal/impl/kernels/kernel_types.hpp +++ b/tt_metal/impl/kernels/kernel_types.hpp @@ -5,6 +5,8 @@ #pragma once #include "common/base_types.hpp" +#include "tt_metal/impl/kernels/data_types.hpp" +#include "tt_metal/detail/util.hpp" #include #include #include @@ -13,23 +15,6 @@ namespace tt::tt_metal { using KernelHandle = std::uint16_t; -enum class DataMovementProcessor { - RISCV_0 = 0, // BRISC - RISCV_1 = 1, // NCRISC -}; - -enum NOC : uint8_t { - RISCV_0_default = 0, - RISCV_1_default = 1, - NOC_0 = 0, - NOC_1 = 1, -}; - -enum Eth : uint8_t { - SENDER = 0, - RECEIVER = 1, -}; - struct DataMovementConfig { DataMovementProcessor processor = DataMovementProcessor::RISCV_0; // For data transfer kernels: NCRISC & BRISC NOC noc = NOC::RISCV_0_default; @@ -40,6 +25,24 @@ struct DataMovementConfig { std::map defines; }; +struct ReaderDataMovementConfig : public DataMovementConfig { + ReaderDataMovementConfig(std::vector compile_args = {}, std::map defines = {}) : + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_1, + .noc = detail::GetPreferredNOCForDRAMRead(tt::Cluster::instance().arch()), + .compile_args = compile_args, + .defines = defines} {} +}; + +struct WriterDataMovementConfig : public DataMovementConfig { + WriterDataMovementConfig(std::vector compile_args = {}, std::map defines = {}) : + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_0, + .noc = detail::GetPreferredNOCForDRAMWrite(tt::Cluster::instance().arch()), + .compile_args = compile_args, + .defines = defines} {} +}; + struct ComputeConfig { MathFidelity math_fidelity = MathFidelity::HiFi4; bool fp32_dest_acc_en = false; diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp index 164ced53fba..85943cbc7b5 100644 --- a/tt_metal/tt_metal.cpp +++ b/tt_metal/tt_metal.cpp @@ -490,8 +490,6 @@ std::vector & GetRuntimeArgs(const Program &program, KernelHandle kern return detail::GetKernel(program, kernel_id)->runtime_args(logical_core); } - - } // namespace tt_metal } // namespace tt