From 882fe5f98dd61bd9aff700d38606d523d202ade9 Mon Sep 17 00:00:00 2001 From: Bill Teng Date: Tue, 21 Nov 2023 23:12:54 +0000 Subject: [PATCH] #3939: rename KernelID -> KernelHandle and CircularBufferID -> CBHandle --- .../host_apis/runtime_args/runtime_args.rst | 4 +- .../tt_metal/examples/dram_loopback.rst | 2 +- .../tt_metal/examples/eltwise_binary.rst | 8 ++-- .../source/tt_metal/examples/eltwise_sfpu.rst | 6 +-- .../matmul/matmul_global_l1.cpp | 8 ++-- tests/tt_metal/tt_metal/test_add_two_ints.cpp | 2 +- tests/tt_metal/tt_metal/test_compile_args.cpp | 4 +- .../test_datacopy_multi_core_multi_dram.cpp | 6 +-- .../test_matmul_multi_core_multi_dram.cpp | 6 +-- ...matmul_multi_core_multi_dram_in0_mcast.cpp | 8 ++-- ...ti_core_multi_dram_in0_mcast_in1_mcast.cpp | 14 +++---- ...matmul_multi_core_multi_dram_in1_mcast.cpp | 8 ++-- .../test_matmul_multi_core_single_dram.cpp | 2 +- .../tt_metal/test_multi_core_kernel.cpp | 12 +++--- .../tt_metal/test_multiple_programs.cpp | 8 ++-- .../tt_metal/unit_tests/basic/device.cpp | 2 +- .../test_CircularBuffer_allocation.cpp | 8 ++-- .../command_queue/test_EnqueueProgram.cpp | 2 +- .../dprint/test_print_all_harts.cpp | 6 +-- .../dprint/test_raise_wait.cpp | 4 +- .../pipelining/basic_pipeline.cpp | 4 +- .../multi_core_h/bcast_op_multi_core_h.cpp | 4 +- .../multi_core_hw/bcast_op_multi_core_hw.cpp | 4 +- .../multi_core_w/bcast_op_multi_core_w.cpp | 4 +- .../single_core/bcast_op_single_core.cpp | 4 +- ...op_multi_core_reuse_mcast_1d_optimized.cpp | 12 +++--- ...op_multi_core_reuse_mcast_2d_optimized.cpp | 12 +++--- ..._op_multi_core_reuse_mcast_generalized.cpp | 12 +++--- .../bmm_op_multi_core_reuse_optimized.cpp | 4 +- .../multi_core/concat_op_multi_core.cpp | 4 +- .../single_core/concat_op_single_core.cpp | 4 +- .../optimized_conv_op.cpp | 14 +++---- .../optimized_conv_op_sharded.cpp | 12 +++--- .../copy/multi_core/copy_op_multi_core.cpp | 4 +- .../copy/single_core/copy_op_single_core.cpp | 4 +- .../op_library/downsample/downsample_op.cpp | 2 +- .../eltwise_binary_op_multi_core.cpp | 4 +- .../eltwise_binary_op_single_core.cpp | 4 +- .../eltwise_unary_op_multi_core.cpp | 4 +- .../eltwise_unary_op_single_core.cpp | 4 +- .../op_library/embeddings/embeddings_op.cpp | 2 +- .../tt_dnn/op_library/fill_rm/fill_rm_op.cpp | 2 +- .../op_library/moreh_helper_functions.cpp | 24 +++++------ .../op_library/moreh_helper_functions.hpp | 12 +++--- .../multi_core/moreh_matmul_op_multi_core.cpp | 2 +- .../sum/moreh_sum_multi_core.cpp | 2 +- .../multi_core/move_op_multi_core_overlap.cpp | 2 +- tt_eager/tt_dnn/op_library/pad/pad_op.cpp | 12 +++--- .../op_library/pad/pad_op_multi_core.cpp | 4 +- .../op_library/pool/max_pool_multi_core.cpp | 6 +-- .../max_pool_multi_core_sharded_with_halo.cpp | 2 +- .../multi_core_h/reduce_op_multi_core_h.cpp | 10 ++--- .../multi_core_w/reduce_op_multi_core_w.cpp | 4 +- .../single_core/reduce_op_single_core.cpp | 4 +- .../tt_dnn/op_library/reshape/reshape_op.cpp | 8 ++-- .../rotary_embedding_op_multi_core.cpp | 4 +- .../rotary_embedding_op_single_core.cpp | 4 +- .../rotate_half_op_single_core.cpp | 4 +- .../multi_core/sharded_op_multi_core.cpp | 8 ++-- .../tt_dnn/op_library/softmax/softmax_op.cpp | 6 +-- .../split/split_last_dim_two_chunks_tiled.cpp | 4 +- .../tilize_op_multi_core.cpp | 12 +++--- .../tilize_op_single_core.cpp | 8 ++-- .../cn_multi_core/transpose_cn_multi_core.cpp | 4 +- .../transpose_hc_op_multi_core.cpp | 4 +- .../single_core/transpose_op_single_core.cpp | 12 +++--- .../transpose_wh_op_multi_core.cpp | 4 +- .../unpad/multi_core/unpad_op_multi_core.cpp | 8 ++-- .../single_core/unpad_op_single_core.cpp | 8 ++-- .../multi_core/untilize_op_multi_core.cpp | 10 ++--- .../single_core/untilize_op_single_core.cpp | 8 ++-- .../untilize/untilize_with_halo_op.cpp | 12 +++--- .../multi_core/update_cache_op_multi_core.cpp | 4 +- .../update_cache_op_single_core.cpp | 8 ++-- tt_metal/detail/program.hpp | 6 +-- tt_metal/detail/tt_metal.hpp | 2 +- tt_metal/host_api.hpp | 32 +++++++-------- tt_metal/impl/buffers/circular_buffer.hpp | 2 +- .../impl/buffers/circular_buffer_types.hpp | 2 +- tt_metal/impl/dispatch/command_queue.cpp | 8 ++-- tt_metal/impl/kernels/kernel_types.hpp | 2 +- tt_metal/impl/program/program.cpp | 34 ++++++++-------- tt_metal/impl/program/program.hpp | 40 +++++++++---------- .../eltwise_binary/eltwise_binary.cpp | 12 +++--- .../eltwise_sfpu/eltwise_sfpu.cpp | 10 ++--- .../loopback/loopback.cpp | 2 +- .../matmul_multicore_reuse_mcast.cpp | 4 +- .../test_custom_cycle_count.cpp | 6 +-- .../test_full_buffer/test_full_buffer.cpp | 6 +-- tt_metal/tt_metal.cpp | 24 +++++------ 90 files changed, 330 insertions(+), 330 deletions(-) diff --git a/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst b/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst index 89427579ae3..7578c5964e9 100644 --- a/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst +++ b/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst @@ -1,8 +1,8 @@ Runtime Arguments ================== -.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelID kernel_id, const std::variant &logical_core, const std::vector &runtime_args) +.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const std::variant &logical_core, const std::vector &runtime_args) -.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector > &runtime_args) +.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector > &runtime_args) .. doxygenfunction:: GetRuntimeArgs diff --git a/docs/source/tt_metal/examples/dram_loopback.rst b/docs/source/tt_metal/examples/dram_loopback.rst index a1b49adfd9d..68532cc1668 100644 --- a/docs/source/tt_metal/examples/dram_loopback.rst +++ b/docs/source/tt_metal/examples/dram_loopback.rst @@ -50,7 +50,7 @@ We will be using the accelerator core with coordinates ``{0, 0}``. constexpr CoreCoord core = {0, 0}; - KernelID dram_copy_kernel_id = CreateKernel( + KernelHandle dram_copy_kernel_id = CreateKernel( program, "tt_metal/programming_examples/loopback/kernels/loopback_dram_copy.cpp", core, diff --git a/docs/source/tt_metal/examples/eltwise_binary.rst b/docs/source/tt_metal/examples/eltwise_binary.rst index 06d70314aef..abda3b25e99 100644 --- a/docs/source/tt_metal/examples/eltwise_binary.rst +++ b/docs/source/tt_metal/examples/eltwise_binary.rst @@ -26,19 +26,19 @@ We already have set the circular buffers needed for compute data communication. constexpr uint32_t num_input_tiles = 2; constexpr uint32_t input_cb_size = num_input_tiles * single_tile_size; CircularBufferConfig cb_src0_config = CircularBufferConfig(input_cb_size, {{src0_cb_index, tt::DataFormat::Float16_b}}, src0_cb_addr).set_page_size(src0_cb_index, single_tile_size); - CircularBufferID cb_src0 = CreateCircularBuffer(program, core, cb_src0_config); + CBHandle cb_src0 = CreateCircularBuffer(program, core, cb_src0_config); constexpr uint32_t src1_cb_index = CB::c_in1; constexpr uint32_t src1_cb_addr = 300 * 1024; CircularBufferConfig cb_src1_config = CircularBufferConfig(input_cb_size, {{src1_cb_index, tt::DataFormat::Float16_b}}, src1_cb_addr).set_page_size(src1_cb_index, single_tile_size); - CircularBufferID cb_src1 = CreateCircularBuffer(program, core, cb_src1_config); + CBHandle cb_src1 = CreateCircularBuffer(program, core, cb_src1_config); constexpr uint32_t output_cb_index = CB::c_out0; constexpr uint32_t output_cb_addr = 400 * 1024; constexpr uint32_t num_output_tiles = 2; constexpr uint32_t input_cb_size = num_input_tiles * single_tile_size; CircularBufferConfig cb_output_config = CircularBufferConfig(input_cb_size, {{output_cb_index, tt::DataFormat::Float16_b}}, output_cb_addr).set_page_size(output_cb_index, single_tile_size); - CircularBufferID cb_output = CreateCircularBuffer(program, core, cb_output); + CBHandle cb_output = CreateCircularBuffer(program, core, cb_output); We will create two input circular buffers to accommodate our two input tensors, and an output one for the result of the eltwise binary operation. @@ -48,7 +48,7 @@ Compute kernel declaration and compile-time defines .. code-block:: cpp - KernelID eltwise_binary_kernel_id = CreateKernel( + KernelHandle eltwise_binary_kernel_id = CreateKernel( program, "tt_metal/kernels/compute/eltwise_binary.cpp", core, diff --git a/docs/source/tt_metal/examples/eltwise_sfpu.rst b/docs/source/tt_metal/examples/eltwise_sfpu.rst index 9099b9f5e40..db40eb0e63d 100644 --- a/docs/source/tt_metal/examples/eltwise_sfpu.rst +++ b/docs/source/tt_metal/examples/eltwise_sfpu.rst @@ -23,12 +23,12 @@ compute, and writer engines. constexpr uint32_t src0_cb_index = CB::c_in0; constexpr uint32_t num_input_tiles = 2; CircularBufferConfig cb_src0_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src0_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src0_cb_index, single_tile_size); - CircularBufferID cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); + CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); constexpr uint32_t output_cb_index = CB::c_out0; constexpr uint32_t num_output_tiles = 2; CircularBufferConfig cb_output_config = CircularBufferConfig(num_output_tiles * single_tile_size, {{output_cb_index, tt::DataFormat::Float16_b}}).set_page_size(output_cb_index, single_tile_size); - CircularBufferID cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); + CBHandle cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); We will create one input circular buffers to accommodate our input tensor, and an output one for the result of the eltwise sfpu operation. @@ -59,7 +59,7 @@ Compute kernel declaration and compile-time defines {"SFPU_OP_CHAIN_0", "exp_tile_init(); exp_tile(0);"} }; - KernelID eltwise_sfpu_kernel_id = CreateKernel( + KernelHandle eltwise_sfpu_kernel_id = CreateKernel( program, "tt_metal/kernels/compute/eltwise_sfpu.cpp", core, diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp index 2901feeec4b..3eb8f994a6c 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp @@ -400,8 +400,8 @@ tt_metal::Program create_program_mcast_in0_in1( .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_receiver_compile_time_args}); - KernelID mm_kernel_in1_receiver_writer_other_noc_setup_id = 0; - KernelID mm_kernel_in0_receiver_other_noc_setup_id = 0; + KernelHandle mm_kernel_in1_receiver_writer_other_noc_setup_id = 0; + KernelHandle mm_kernel_in0_receiver_other_noc_setup_id = 0; if (split_half) { mm_kernel_in1_receiver_writer_other_noc_setup_id = @@ -586,8 +586,8 @@ tt_metal::Program create_program_mcast_in0_in1( (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for (int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) { for (int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) { CoreCoord core = {(std::size_t)start_core_x + core_idx_x, diff --git a/tests/tt_metal/tt_metal/test_add_two_ints.cpp b/tests/tt_metal/tt_metal/test_add_two_ints.cpp index 20d652f2e5e..d7857904fe9 100644 --- a/tests/tt_metal/tt_metal/test_add_two_ints.cpp +++ b/tests/tt_metal/tt_metal/test_add_two_ints.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) { std::vector first_runtime_args = {101, 202}; std::vector second_runtime_args = {303, 606}; - tt_metal::KernelID add_two_ints_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle add_two_ints_kernel = tt_metal::CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/misc/add_two_ints.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default}); diff --git a/tests/tt_metal/tt_metal/test_compile_args.cpp b/tests/tt_metal/tt_metal/test_compile_args.cpp index a61ca1982c3..b3bde88a8de 100644 --- a/tests/tt_metal/tt_metal/test_compile_args.cpp +++ b/tests/tt_metal/tt_metal/test_compile_args.cpp @@ -33,14 +33,14 @@ bool test_compile_args(std::vector compile_args_vec, int device_id) { CoreCoord core = {0, 0}; - tt_metal::KernelID unary_reader_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel = tt_metal::CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/dataflow/test_compile_args.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = compile_args_vec}); - tt_metal::KernelID unary_writer_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel = tt_metal::CreateKernel( program, "tt_metal/kernels/dataflow/blank.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default}); diff --git a/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp b/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp index 459e9f49679..f955069ad10 100644 --- a/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp +++ b/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp @@ -84,7 +84,7 @@ std::vector select_columns(std::vector data, int M, int K, i return result; } -std::tuple create_program( +std::tuple create_program( tt_metal::Device *device, int num_cores_r, int num_cores_c, @@ -171,8 +171,8 @@ bool write_runtime_args_to_device( tt_metal::Program &program, int num_cores_r, int num_cores_c, - tt_metal::KernelID reader_kernel, - tt_metal::KernelID writer_kernel, + tt_metal::KernelHandle reader_kernel, + tt_metal::KernelHandle writer_kernel, int tensor_num_tiles, int block_num_tiles, int Ht, diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp index 3471331f97a..984c24dfbbe 100644 --- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp +++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp @@ -86,7 +86,7 @@ std::vector select_columns(std::vector data, int M, int K, i return result; } -std::tuple create_program( +std::tuple create_program( tt_metal::Device *device, int num_cores_r, int num_cores_c, @@ -193,8 +193,8 @@ bool assign_runtime_args_to_program( tt_metal::Program &program, int num_cores_r, int num_cores_c, - tt_metal::KernelID mm_reader_kernel, - tt_metal::KernelID unary_writer_kernel, + tt_metal::KernelHandle mm_reader_kernel, + tt_metal::KernelHandle unary_writer_kernel, int M, int N, int K, diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp index b93740cb261..ef48a028825 100644 --- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp +++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp @@ -86,7 +86,7 @@ std::vector select_columns(std::vector data, int M, int K, i return result; } -std::tuple create_program( +std::tuple create_program( tt_metal::Device *device, int start_core_x, int start_core_y, @@ -216,9 +216,9 @@ bool write_runtime_args_to_device( int start_core_y, int num_cores_r, int num_cores_c, - tt_metal::KernelID mm_reader_kernel_sender, - tt_metal::KernelID mm_reader_kernel_receiver, - tt_metal::KernelID unary_writer_kernel, + tt_metal::KernelHandle mm_reader_kernel_sender, + tt_metal::KernelHandle mm_reader_kernel_receiver, + tt_metal::KernelHandle unary_writer_kernel, int M, int N, int K, diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp index 74693f349e6..a1015937ebf 100644 --- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp +++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp @@ -85,7 +85,7 @@ std::vector select_columns(std::vector data, int M, int K, i return result; } -std::tuple create_program( +std::tuple create_program( tt_metal::Device *device, int start_core_x, int start_core_y, @@ -250,12 +250,12 @@ bool write_runtime_args_to_device( int start_core_y, int num_cores_r, int num_cores_c, - tt_metal::KernelID mm_reader_kernel_in0_sender_in1_sender, - tt_metal::KernelID mm_reader_kernel_in0_sender_in1_receiver, - tt_metal::KernelID mm_reader_kernel_in0_receiver_in1_sender, - tt_metal::KernelID mm_reader_kernel_in0_receiver_in1_receiver, - tt_metal::KernelID unary_writer_kernel_noc0, - tt_metal::KernelID unary_writer_kernel_noc1, + tt_metal::KernelHandle mm_reader_kernel_in0_sender_in1_sender, + tt_metal::KernelHandle mm_reader_kernel_in0_sender_in1_receiver, + tt_metal::KernelHandle mm_reader_kernel_in0_receiver_in1_sender, + tt_metal::KernelHandle mm_reader_kernel_in0_receiver_in1_receiver, + tt_metal::KernelHandle unary_writer_kernel_noc0, + tt_metal::KernelHandle unary_writer_kernel_noc1, int M, int N, int K, diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp index eb67c7629fd..5bf60e880b2 100644 --- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp +++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp @@ -85,7 +85,7 @@ std::vector select_columns(std::vector data, int M, int K, i return result; } -std::tuple create_program( +std::tuple create_program( tt_metal::Device *device, int start_core_x, int start_core_y, @@ -216,9 +216,9 @@ bool write_runtime_args_to_device( int start_core_y, int num_cores_r, int num_cores_c, - tt_metal::KernelID mm_reader_kernel_sender, - tt_metal::KernelID mm_reader_kernel_receiver, - tt_metal::KernelID unary_writer_kernel, + tt_metal::KernelHandle mm_reader_kernel_sender, + tt_metal::KernelHandle mm_reader_kernel_receiver, + tt_metal::KernelHandle unary_writer_kernel, int M, int N, int K, diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp index 809fe627582..7ea69d406a1 100644 --- a/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp +++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp @@ -152,7 +152,7 @@ std::vector select_columns(std::vector data, int M, int K, i return result; } -std::tuple create_program(tt_metal::Device *device, int num_cores_r, int num_cores_c, int per_core_M, int per_core_N, int K, int in0_block_w, int out_subblock_h, int out_subblock_w) { +std::tuple create_program(tt_metal::Device *device, int num_cores_r, int num_cores_c, int per_core_M, int per_core_N, int K, int in0_block_w, int out_subblock_h, int out_subblock_w) { tt_metal::Program program = tt_metal::CreateProgram(); uint32_t single_tile_size = 2 * 1024; uint32_t in0_block_tiles = per_core_M * in0_block_w; diff --git a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp index a8ef950d59e..4e710ec484c 100644 --- a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp +++ b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp @@ -18,7 +18,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// using namespace tt; -std::tuple create_program( +std::tuple create_program( tt_metal::Device *device, uint32_t single_tile_size, const CoreRange &all_cores, @@ -87,7 +87,7 @@ void compile_and_configure_program( } -void set_rt_args(tt_metal::Program &program, tt_metal::KernelID kernel, const CoreRange &core_range, const std::vector &rt_args) { +void set_rt_args(tt_metal::Program &program, tt_metal::KernelHandle kernel, const CoreRange &core_range, const std::vector &rt_args) { for (auto x = core_range.start.x; x <= core_range.end.x; x++) { for (auto y = core_range.start.y; y <= core_range.end.y; y++) { CoreCoord core = CoreCoord(x, y); @@ -99,8 +99,8 @@ void set_rt_args(tt_metal::Program &program, tt_metal::KernelID kernel, const Co void write_same_runtime_args_to_device( tt_metal::Device *device, tt_metal::Program &program, - tt_metal::KernelID reader_kernel_id, - tt_metal::KernelID writer_kernel_id, + tt_metal::KernelHandle reader_kernel_id, + tt_metal::KernelHandle writer_kernel_id, const CoreRange &core_range, int32_t num_tiles, tt_metal::Buffer &src_dram_buffer, @@ -129,8 +129,8 @@ void write_same_runtime_args_to_device( void write_unique_writer_runtime_args_to_device( tt_metal::Device *device, tt_metal::Program &program, - tt_metal::KernelID reader_kernel_id, - tt_metal::KernelID writer_kernel_id, + tt_metal::KernelHandle reader_kernel_id, + tt_metal::KernelHandle writer_kernel_id, const CoreRange &core_range, const CoreRangeSet &core_blocks, int32_t num_tiles, diff --git a/tests/tt_metal/tt_metal/test_multiple_programs.cpp b/tests/tt_metal/tt_metal/test_multiple_programs.cpp index 0559aee7f00..fade0d13dc9 100644 --- a/tests/tt_metal/tt_metal/test_multiple_programs.cpp +++ b/tests/tt_metal/tt_metal/test_multiple_programs.cpp @@ -35,7 +35,7 @@ std::map get_defines(BinaryOpType::Enum op_type){ } -std::tuple setup_program_one(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) { +std::tuple setup_program_one(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) { tt_metal::Program program = tt_metal::CreateProgram(); uint32_t src0_cb_index = 0; @@ -88,7 +88,7 @@ std::tuple setup_prog return {std::move(program), binary_reader_kernel, unary_writer_kernel}; } -std::tuple setup_program_two(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) { +std::tuple setup_program_two(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) { tt_metal::Program program = tt_metal::CreateProgram(); uint32_t src0_cb_index = 0; @@ -143,8 +143,8 @@ std::tuple setup_prog void write_program_runtime_args_to_device( tt_metal::Device *device, tt_metal::Program &program, - tt_metal::KernelID reader_kernel_id, - tt_metal::KernelID writer_kernel_id, + tt_metal::KernelHandle reader_kernel_id, + tt_metal::KernelHandle writer_kernel_id, const CoreCoord &core, uint32_t num_tiles, tt_metal::Buffer &src0_dram_buffer, diff --git a/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp b/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp index 5e0e4a5ae22..5458fe3a3ab 100644 --- a/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp @@ -315,7 +315,7 @@ TEST_F(DeviceFixture, ValidateKernelDoesNotTargetHarvestedCores) { CoreCoord logical_target_core = CoreCoord({.x = 0, .y = 0}); uint32_t intermediate_l1_addr = L1_UNRESERVED_BASE; uint32_t size_bytes = host_input.size() * sizeof(uint32_t); - tt_metal::KernelID kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle kernel_id = tt_metal::CreateKernel( program, kernel_name, logical_target_core, diff --git a/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp b/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp index 0ba4f3959c7..a54cacb65d2 100644 --- a/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp @@ -180,7 +180,7 @@ TEST_F(DeviceFixture, TestValidUpdateCircularBufferSize) { const uint32_t core0_num_cbs = 2; std::map> golden_addresses_per_core; - std::vector cb_ids; + std::vector cb_ids; auto expected_cb_addr = L1_UNRESERVED_BASE; for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) { CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size); @@ -213,7 +213,7 @@ TEST_F(DeviceFixture, TestInvalidUpdateCircularBufferSize) { const uint32_t core0_num_cbs = 2; std::map> golden_addresses_per_core; - std::vector cb_ids; + std::vector cb_ids; auto expected_cb_addr = L1_UNRESERVED_BASE; for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) { CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size); @@ -246,7 +246,7 @@ TEST_F(DeviceFixture, TestUpdateCircularBufferAddress) { const uint32_t core0_num_cbs = 2; std::map> golden_addresses_per_core; - std::vector cb_ids; + std::vector cb_ids; auto expected_cb_addr = L1_UNRESERVED_BASE; for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) { CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size); @@ -277,7 +277,7 @@ TEST_F(DeviceFixture, TestUpdateCircularBufferPageSize) { const uint32_t core0_num_cbs = 2; std::map> golden_addresses_per_core; std::map> golden_num_pages_per_core; - std::vector cb_ids; + std::vector cb_ids; auto expected_cb_addr = L1_UNRESERVED_BASE; for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) { CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size); diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp index 2fd7feee4df..a597efb0353 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp @@ -117,7 +117,7 @@ bool test_dummy_EnqueueProgram_with_cbs_update_size(Device* device, CommandQueue Program program; - std::vector cb_ids; + std::vector cb_ids; for (uint32_t i = 0; i < program_config.cb_config_vector.size(); i++) { uint32_t cb_id = program_config.cb_config_vector[i].cb_id; uint32_t cb_num_pages = program_config.cb_config_vector[i].num_pages; diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp index 2abdcf2f2f0..3df8ef3245c 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp @@ -35,19 +35,19 @@ TEST_F(CommandQueueWithDPrintFixture, TestPrintFromAllHarts) { // Three different kernels to mirror typical usage and some previously // failing test cases, although all three kernels simply print. constexpr CoreCoord core = {0, 0}; // Print on first core only - KernelID brisc_print_kernel_id = CreateKernel( + KernelHandle brisc_print_kernel_id = CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/misc/brisc_print.cpp", core, DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default} ); - KernelID ncrisc_print_kernel_id = CreateKernel( + KernelHandle ncrisc_print_kernel_id = CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/misc/ncrisc_print.cpp", core, DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default} ); - KernelID trisc_print_kernel_id = CreateKernel( + KernelHandle trisc_print_kernel_id = CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/misc/trisc_print.cpp", core, diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp index fa0a8b8d74a..0af2eabccf8 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp @@ -33,7 +33,7 @@ try{ // Two kernels - one for brisc and one for ncrisc. Nothing for triscs in // this test. - KernelID brisc_kernel_id = CreateKernel( + KernelHandle brisc_kernel_id = CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/misc/dprint_raise_wait_brisc.cpp", CoreRange{ @@ -45,7 +45,7 @@ try{ .noc = NOC::RISCV_0_default } ); - KernelID ncrisc_kernel_id = CreateKernel( + KernelHandle ncrisc_kernel_id = CreateKernel( program, "tests/tt_metal/tt_metal/test_kernels/misc/dprint_raise_wait_ncrisc.cpp", CoreRange{ diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp index db348d1870f..6dbbf5f0882 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp @@ -97,8 +97,8 @@ void create_and_run_row_pipeline(tt_metal::Device* device, const PipelineRowConf dst_noc_xy = dst_buffer.noc_coordinates(); // create kernels - vector receiver_kernels; - vector sender_kernels; + vector receiver_kernels; + vector sender_kernels; for (int core_id = 0; core_id < num_cores; core_id++) { string receiver_kernel_name; if (core_id == 0) { diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp index e495173e616..3b02137b2ce 100644 --- a/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp @@ -85,13 +85,13 @@ operation::ProgramWithCallbacks bcast_multi_core_h(const Tensor &a, const Tensor bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; std::vector writer_compile_time_args = {(uint32_t)dst_is_dram}; - KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, reader_name, all_device_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp", all_device_cores, diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp index 805bb854898..86dd099f4fa 100644 --- a/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp @@ -95,13 +95,13 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso reader_defines["BCAST_SCALAR"] = "1"; bcast_compute_defines["BCAST_SCALAR"] = "1"; } - KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, reader_name, all_device_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); - KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_device_cores, diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp index 72b60df9be5..8f9663b22dd 100644 --- a/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp @@ -86,13 +86,13 @@ operation::ProgramWithCallbacks bcast_multi_core_w(const Tensor &a, const Tensor bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; std::vector writer_compile_time_args = {(uint32_t)dst_is_dram}; - KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, reader_name, all_device_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp", all_device_cores, diff --git a/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp b/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp index 2abd6e622a5..4be64942e7b 100644 --- a/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp @@ -77,13 +77,13 @@ operation::ProgramWithCallbacks bcast_single_core(const Tensor &a, const Tensor }; const char* reader_name = bcast_op_utils::get_reader_name(bcast_dim, BcastOpParallelizationStrategy::SINGLE_CORE); - KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, reader_name, core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp index 542ed7d31a3..07c8a5b0928 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp @@ -260,7 +260,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0( tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines}); - KernelID mm_kernel_in0_receiver_id = 0; + KernelHandle mm_kernel_in0_receiver_id = 0; if (!in0_is_sharded) { mm_kernel_in0_receiver_id = tt_metal::CreateKernel( program, @@ -320,7 +320,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0( auto cb_src1 = tt_metal::CreateCircularBuffer(program, all_cores, src1_cb_config); uint32_t src2_cb_index = 2; - CircularBufferID cb_src2 = 0; + CBHandle cb_src2 = 0; if (in0_is_sharded) { tt_metal::CircularBufferConfig src2_cb_config = tt_metal::CircularBufferConfig(in2_CB_size, {{src2_cb_index, in0_data_format}}) .set_page_size(src2_cb_index, in0_single_tile_size).set_globally_allocated_address(*in0_buffer); @@ -359,8 +359,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0( uint32_t last_block_padded_block_tiles_w_skip = (out_subblock_w * out_subblock_h) * (per_core_N / out_subblock_w - last_block_num_nonzero_subblocks_w); uint32_t last_block_padded_block_tiles_h_skip = (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; std::vector in0_mcast_noc_x; std::vector in0_mcast_noc_y; @@ -873,8 +873,8 @@ operation::ProgramWithCallbacks create_program_mcast_in1( uint32_t last_block_padded_block_tiles_w_skip = (out_subblock_w * out_subblock_h) * (per_core_N / out_subblock_w - last_block_num_nonzero_subblocks_w); uint32_t last_block_padded_block_tiles_h_skip = (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for(uint32_t i = 0; i < num_cores; i++) { uint32_t core_idx_x = i % num_cores_c; uint32_t core_idx_y = i / num_cores_c; diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp index 039981c7de0..d30c76d6869 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp @@ -349,7 +349,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( (CoreRangeSet) (std::set) {in0_sender_in1_receiver, in0_receiver_in1_receiver_left_half}, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = in1_noc, .compile_args = in1_receiver_writer_compile_time_args, .defines = mm_kernel_in1_receiver_writer_defines}); - KernelID mm_kernel_in0_receiver_id = 0; + KernelHandle mm_kernel_in0_receiver_id = 0; if (!in0_is_sharded) { mm_kernel_in0_receiver_id = tt_metal::CreateKernel( program, @@ -359,8 +359,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = in0_noc, .compile_args = in0_receiver_compile_time_args}); } - KernelID mm_kernel_in1_receiver_writer_other_noc_setup_id = mm_kernel_in1_receiver_writer_id; - KernelID mm_kernel_in0_receiver_other_noc_setup_id = mm_kernel_in0_receiver_id; + KernelHandle mm_kernel_in1_receiver_writer_other_noc_setup_id = mm_kernel_in1_receiver_writer_id; + KernelHandle mm_kernel_in0_receiver_other_noc_setup_id = mm_kernel_in0_receiver_id; if (split_half) { mm_kernel_in1_receiver_writer_other_noc_setup_id = tt_metal::CreateKernel( @@ -427,7 +427,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( auto cb_src1 = tt_metal::CreateCircularBuffer(program, all_cores, src1_cb_config); uint32_t src2_cb_index = 2; - CircularBufferID cb_src2 = 0; + CBHandle cb_src2 = 0; if (in0_is_sharded) { tt_metal::CircularBufferConfig src2_cb_config = tt_metal::CircularBufferConfig(in2_CB_size, {{src2_cb_index, in0_data_format}}) .set_page_size(src2_cb_index, in0_single_tile_size).set_globally_allocated_address(*in0_buffer); @@ -467,8 +467,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( uint32_t last_block_padded_block_tiles_w_skip = (out_subblock_w * out_subblock_h) * (per_core_N / out_subblock_w - last_block_num_nonzero_subblocks_w); uint32_t last_block_padded_block_tiles_h_skip = (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; uint32_t diff_start_coord; uint32_t diff_end_coord; diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp index 7a302a50c61..d9788814e2c 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp @@ -187,8 +187,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1( .set_page_size(interm0_cb_index, single_tile_size); auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) { for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) { CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y}; @@ -473,8 +473,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0( .set_page_size(interm0_cb_index, single_tile_size); auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) { for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) { CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y}; @@ -733,8 +733,8 @@ operation::ProgramWithCallbacks create_program_mcast_in1( .set_page_size(interm0_cb_index, single_tile_size); auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) { for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) { CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y}; diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp index 21e082a683c..df23054ec94 100644 --- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp @@ -205,8 +205,8 @@ operation::ProgramWithCallbacks create_program( .set_page_size(interm0_cb_index, output_single_tile_size); auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config); - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for (uint32_t i = 0, num_blocks_written = 0; i < num_cores; i++){ uint32_t core_idx_x = i / core_range.y; uint32_t core_idx_y = i % core_range.y; diff --git a/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp index 1659ab88e84..3c705e24448 100644 --- a/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp @@ -95,13 +95,13 @@ operation::ProgramWithCallbacks concat_multi_core(const std::vector &inp }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_concat_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp b/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp index 8f39b7072af..5ec76ed4d76 100644 --- a/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp @@ -95,13 +95,13 @@ operation::ProgramWithCallbacks concat_single_core(const std::vector &in }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_concat_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp index a1938b88d59..4f3c78e8378 100644 --- a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp +++ b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp @@ -37,7 +37,7 @@ const uint32_t untilize_mode_reblock_cb = CB::c_intermed2; const uint32_t out0_cb = CB::c_out0; -tuple create_CBs(tt_metal::Program &program, +tuple create_CBs(tt_metal::Program &program, const Tensor& input, CoreRange core, uint32_t num_cb0_tiles, @@ -68,8 +68,8 @@ tuple create_CBs(tt_metal::Program &program, .set_page_size(act_cb, act_tile_size); auto cb_act = tt_metal::CreateCircularBuffer(program, core, cb_act_config); - CircularBufferID cb_sharded_act = 0; - CircularBufferID cb_sharded_act_mcast_receiver = 0; + CBHandle cb_sharded_act = 0; + CBHandle cb_sharded_act_mcast_receiver = 0; if (input.is_sharded()) { uint32_t num_bytes_for_df = datum_size(act_df); auto shard_shape = input.shard_spec().value().shard_shape; @@ -96,7 +96,7 @@ tuple create_CBs(tt_metal::Program &program, .set_page_size(tilize_mode_tilized_act_cb, tilized_act_tile_size); auto cb_src0_tilized = tt_metal::CreateCircularBuffer(program, core, cb_src0_tilized_config); - CircularBufferID cb_output = 0; + CBHandle cb_output = 0; if (untilize_out) { CircularBufferConfig cb_matmul_partials_config = CircularBufferConfig(num_output_tiles * out_tile_size, {{matmul_partials_cb, out_df}}) .set_page_size(matmul_partials_cb, out_tile_size); @@ -738,7 +738,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_(const Tensor& a, cons .compile_args = writer_compile_time_args, .defines = writer_mcast_sender_defines}); - KernelID writer_mcast_receiver_id; + KernelHandle writer_mcast_receiver_id; if (total_num_cores > 1) { writer_mcast_receiver_id = CreateKernel( program, @@ -779,8 +779,8 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_(const Tensor& a, cons noop_cores, ComputeConfig{}); } - vector reader_ids; - vector writer_ids; + vector reader_ids; + vector writer_ids; //tt_start_debug_print_server(); for(uint32_t core_i = 0; core_i < total_num_cores; core_i++) { uint32_t core_x_i = core_i % num_cores_x; diff --git a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp index 4dbb23eaa3a..253acdf78d2 100644 --- a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp +++ b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp @@ -37,7 +37,7 @@ const uint32_t untilize_mode_reblock_cb = CB::c_intermed2; const uint32_t out0_cb = CB::c_out0; -tuple create_CBs_for_sharded_input( +tuple create_CBs_for_sharded_input( tt_metal::Program &program, const Tensor& input, CoreRange core, @@ -64,7 +64,7 @@ tuple create_CBs_for_sharded_input( uint32_t tilized_act_tile_size = tt_metal::detail::TileSize(tilized_act_df); uint32_t out_tile_size = tt_metal::detail::TileSize(out_df); - CircularBufferID cb_sharded_act = 0; + CBHandle cb_sharded_act = 0; if (input.memory_config().is_sharded()) { uint32_t num_bytes_for_df = datum_size(act_df); auto shard_shape = input.shard_spec().value().shard_shape; @@ -111,7 +111,7 @@ tuple create_CBs_for_sharded_input( .set_page_size(tilize_mode_tilized_act_cb, tilized_act_tile_size); auto cb_src0_tilized = tt_metal::CreateCircularBuffer(program, core, cb_src0_tilized_config); - CircularBufferID cb_output = 0; + CBHandle cb_output = 0; if (untilize_out) { CircularBufferConfig cb_matmul_partials_config = CircularBufferConfig(num_output_tiles * out_tile_size, {{matmul_partials_cb, out_df}}) .set_page_size(matmul_partials_cb, out_tile_size); @@ -782,7 +782,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_(const Tensor& .compile_args = writer_compile_time_args, .defines = writer_mcast_sender_defines}); - KernelID writer_mcast_receiver_id; + KernelHandle writer_mcast_receiver_id; if (total_num_cores > 1) { writer_mcast_receiver_id = CreateKernel( program, @@ -823,8 +823,8 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_(const Tensor& noop_cores, ComputeConfig{}); } - vector reader_ids; - vector writer_ids; + vector reader_ids; + vector writer_ids; //tt_start_debug_print_server(); for(uint32_t core_i = 0; core_i < total_num_cores; core_i++) { uint32_t core_x_i = core_i % num_cores_x; diff --git a/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp index a82ccfe33dc..cb1c4924d94 100644 --- a/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp @@ -87,13 +87,13 @@ operation::ProgramWithCallbacks copy_multi_core(const Tensor &input, const Tenso if (backwards) { kernel_defines["BACKWARDS"] = "1"; } - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = kernel_defines}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp b/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp index fa283f44d10..3a1d0115438 100644 --- a/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp @@ -84,13 +84,13 @@ operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tens if (backwards) { kernel_defines["BACKWARDS"] = "1"; } - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = kernel_defines}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, tilized ? "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp b/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp index d62e9e7a96a..cc0427e0e9d 100644 --- a/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp +++ b/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp @@ -515,7 +515,7 @@ operation::ProgramWithCallbacks downsample_single_core(const Tensor &a, std::arr }; // Writer to downsample - drops rows from untilized cb - tt_metal::KernelID downsample_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle downsample_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/downsample/kernels/downsample_writer_kernel.cpp", core_range, diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp index 52af678e2d2..7311174c8b5 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp @@ -132,13 +132,13 @@ operation::ProgramWithCallbacks eltwise_binary_multi_core(const Tensor &a, const (std::uint32_t) dst_is_dram }; - KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_binary_interleaved_start_id.cpp", all_device_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); - KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, (block_sharded and not out_sharded) ? "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded_blocks_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_device_cores, diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp index 420b5e144e2..d0dd885d0cb 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp @@ -79,13 +79,13 @@ operation::ProgramWithCallbacks eltwise_binary_single_core(const Tensor &a, cons (std::uint32_t) dst_is_dram }; - KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_binary_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp index d78b43dd2df..de41302a09a 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp @@ -56,13 +56,13 @@ operation::ProgramWithCallbacks eltwise_unary_multi_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp index 2d95bbc5548..c060a9d9e72 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp @@ -50,13 +50,13 @@ operation::ProgramWithCallbacks eltwise_unary_single_core(const Tensor &a, Tenso (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp b/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp index 383e3289dc7..51dc25f9879 100644 --- a/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp +++ b/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp @@ -388,7 +388,7 @@ operation::ProgramWithCallbacks embeddings_rm( std::vector noc_ports = {tt_metal::NOC::RISCV_0_default, tt_metal::NOC::RISCV_1_default}; - std::vector kernIds(RISC_CORES_PER_TENSIX); + std::vector kernIds(RISC_CORES_PER_TENSIX); for(int risc_id =0; risc_id < embedding_risc_cores_per_tensix; risc_id++){ std::vector embedding_compile_time_args= { (std::uint32_t) in0_is_dram, diff --git a/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp b/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp index c7f4abf9cc9..8f503844582 100644 --- a/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp +++ b/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp @@ -42,7 +42,7 @@ operation::ProgramWithCallbacks fill_rm_single_core(const Tensor& any, Tensor &o bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; std::vector reader_compile_time_args = {(std::uint32_t) dst_is_dram}; - tt_metal::KernelID binary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/fill_rm_interleaved.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args=reader_compile_time_args}); diff --git a/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp b/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp index f0169ca3912..962d81ca1f0 100644 --- a/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp @@ -83,7 +83,7 @@ std::tuple &core_spec, @@ -100,7 +100,7 @@ KernelID CreateReadKernel( .defines = defines}); } -KernelID CreateWriteKernel( +KernelHandle CreateWriteKernel( Program &program, const std::string &file_name, const std::variant &core_spec, @@ -117,7 +117,7 @@ KernelID CreateWriteKernel( .defines = defines}); } -[[maybe_unused]] std::vector CreateComputeKernel( +[[maybe_unused]] std::vector CreateComputeKernel( Program &program, const std::string &file_name, std::vector args, @@ -125,8 +125,8 @@ KernelID CreateWriteKernel( MathFidelity math_fidelity, bool fp32_dest_acc_en, bool math_approx_mode) { - std::vector compute_kernel_ids{}; - KernelID compute_kernel_id{}; + std::vector compute_kernel_ids{}; + KernelHandle compute_kernel_id{}; for (auto arg : args) { compute_kernel_id = CreateComputeKernel(program, file_name, arg, defines, math_fidelity, fp32_dest_acc_en, math_approx_mode); @@ -135,7 +135,7 @@ KernelID CreateWriteKernel( return compute_kernel_ids; } -[[maybe_unused]] KernelID CreateComputeKernel( +[[maybe_unused]] KernelHandle CreateComputeKernel( Program &program, const std::string &file_name, ComputeKernelArg arg, @@ -143,7 +143,7 @@ KernelID CreateWriteKernel( MathFidelity math_fidelity, bool fp32_dest_acc_en, bool math_approx_mode) { - KernelID compute_kernel_id{0}; + KernelHandle compute_kernel_id{0}; if (arg.num_tile_per_core_group > 0) { compute_kernel_id = CreateKernel( program, @@ -159,10 +159,10 @@ KernelID CreateWriteKernel( return compute_kernel_id; } -[[maybe_unused]] std::vector CreateCircularBuffer( +[[maybe_unused]] std::vector CreateCircularBuffer( Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, std::vector args) { - std::vector cb_ids{}; - CircularBufferID cb_id{}; + std::vector cb_ids{}; + CBHandle cb_id{}; for (auto arg : args) { cb_id = CreateCircularBuffer(program, core_range, data_format, arg); cb_ids.push_back(cb_id); @@ -170,9 +170,9 @@ KernelID CreateWriteKernel( return cb_ids; } -[[maybe_unused]] CircularBufferID CreateCircularBuffer( +[[maybe_unused]] CBHandle CreateCircularBuffer( Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, CircularBufferArg arg) { - CircularBufferID cb_id{0}; + CBHandle cb_id{0}; if (arg.num_tiles > 0) { auto _buffer_index = arg.buffer_index; auto _num_tiles = arg.num_tiles; diff --git a/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp b/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp index 1526210e956..262f784c7f0 100644 --- a/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp +++ b/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp @@ -55,14 +55,14 @@ std::tuple add_core_offset( std::tuple split_work_to_cores( CoreRange core_range, uint32_t units_to_divide); -KernelID CreateReadKernel( +KernelHandle CreateReadKernel( Program &program, const std::string &file_name, const std::variant &core_spec, const std::vector &compile_args, std::map defines = {}); -KernelID CreateWriteKernel( +KernelHandle CreateWriteKernel( Program &program, const std::string &file_name, const std::variant &core_spec, @@ -75,7 +75,7 @@ struct ComputeKernelArg { const std::vector &compile_args; }; -[[maybe_unused]] std::vector CreateComputeKernel( +[[maybe_unused]] std::vector CreateComputeKernel( Program &program, const std::string &file_name, std::vector args, @@ -84,7 +84,7 @@ struct ComputeKernelArg { bool fp32_dest_acc_en = false, bool math_approx_mode = false); -[[maybe_unused]] KernelID CreateComputeKernel( +[[maybe_unused]] KernelHandle CreateComputeKernel( Program &program, const std::string &file_name, ComputeKernelArg arg, @@ -108,10 +108,10 @@ struct CircularBufferArg { } }; -[[maybe_unused]] std::vector CreateCircularBuffer( +[[maybe_unused]] std::vector CreateCircularBuffer( Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, std::vector args); -[[maybe_unused]] CircularBufferID CreateCircularBuffer( +[[maybe_unused]] CBHandle CreateCircularBuffer( Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, CircularBufferArg arg); } // namespace primary diff --git a/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp index d677673210e..0bf1d1eae73 100644 --- a/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp @@ -156,7 +156,7 @@ operation::ProgramWithCallbacks moreh_matmul_multi_core( const auto compute_kernel_1_id = CreateComputeKernel( program, compute_kernel_file, {core_group_1, num_output_tiles_per_core_group_1, compute_args_group_1}, compute_defines); - std::optional compute_kernel_2_id = std::nullopt; + std::optional compute_kernel_2_id = std::nullopt; if (!core_group_2.ranges().empty()) { const std::vector compute_args_group_2 = { 1, // B diff --git a/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp b/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp index bfac14700aa..80d00563fbd 100644 --- a/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp @@ -108,7 +108,7 @@ operation::ProgramWithCallbacks moreh_sum_multi_core(const Tensor &src, const Te const auto compute_kernel_1_id = CreateComputeKernel( program, compute_kernel_file, {core_group_1, num_cols_per_core_group_1, compute_args_group_1}, compute_defines); - std::optional compute_kernel_2_id = std::nullopt; + std::optional compute_kernel_2_id = std::nullopt; if (!core_group_2.ranges().empty()) { const std::vector compute_args_group_2{num_cols_per_core_group_2}; compute_kernel_2_id = CreateComputeKernel( diff --git a/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp b/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp index 53ef6f77555..78fc55824f3 100644 --- a/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp +++ b/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp @@ -100,7 +100,7 @@ operation::ProgramWithCallbacks move_multi_core_with_overlap(const Tensor &input compile_time_args.push_back((uint32_t)page_size_is_power_of_two); } - KernelID kernel_id = CreateKernel( + KernelHandle kernel_id = CreateKernel( program, tilized ? "tt_eager/tt_dnn/op_library/move/kernels/move_interleaved_with_overlap.cpp" : "tt_eager/tt_dnn/op_library/move/kernels/move_stick_layout_interleaved_with_overlap.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/pad/pad_op.cpp b/tt_eager/tt_dnn/op_library/pad/pad_op.cpp index ab946f79597..7dcaab8d40f 100644 --- a/tt_eager/tt_dnn/op_library/pad/pad_op.cpp +++ b/tt_eager/tt_dnn/op_library/pad/pad_op.cpp @@ -72,13 +72,13 @@ operation::ProgramWithCallbacks pad_rm_reader_writer(const Tensor &a, bfloat16 bfloat_zero = bfloat16(0.0f); uint32_t packed_pad_value = pack_two_bfloat16_into_uint32({bfloat_zero, bfloat_pad_value}); - KernelID reader_kernel_id = CreateKernel(program, + KernelHandle reader_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/reader_pad_dims_rm_interleaved.cpp", cores, DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_ct_args}); - KernelID writer_kernel_id = CreateKernel(program, + KernelHandle writer_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/writer_pad_dims_rm_interleaved.cpp", cores, DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, @@ -218,7 +218,7 @@ operation::ProgramWithCallbacks pad_rm_opt(const Tensor &a, uint32_t packed_pad_value = pack_two_bfloat16_into_uint32({bfloat_zero, bfloat_pad_value}); CoreRange core = {.start={0, 0}, .end={0, 0}}; - KernelID reader_kernel_id = CreateKernel(program, + KernelHandle reader_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/pad_dims_rm_interleaved_opt.cpp", core, DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, @@ -349,7 +349,7 @@ operation::ProgramWithCallbacks pad_rm(const Tensor &a, Tensor &output, const Sh }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/pad_dims_rm_interleaved.cpp", core, @@ -464,13 +464,13 @@ operation::ProgramWithCallbacks pad_tile(const Tensor &a, Tensor& output, const (std::uint32_t) dst_is_dram }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_pad_dims_interleaved.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp index 315b237b0cb..600f2b207a7 100644 --- a/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp @@ -191,13 +191,13 @@ operation::ProgramWithCallbacks pad_rm_reader_writer_multi_core(const Tensor &a, bfloat16 bfloat_zero = bfloat16(0.0f); uint32_t packed_pad_value = pack_two_bfloat16_into_uint32({bfloat_zero, bfloat_pad_value}); - KernelID reader_kernel_id = CreateKernel(program, + KernelHandle reader_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/reader_pad_dims_rm_interleaved.cpp", all_cores, DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_ct_args}); - KernelID writer_kernel_id = CreateKernel(program, + KernelHandle writer_kernel_id = CreateKernel(program, "tt_eager/tt_dnn/kernels/dataflow/writer_pad_dims_rm_interleaved.cpp", all_cores, DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp index 7320ae93a9d..92bd844d7e0 100644 --- a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp @@ -280,7 +280,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic(const Tensor &inp .set_page_size(in_scalar_cb_id, in_scalar_cb_pagesize); auto in_scalar_cb = tt_metal::CreateCircularBuffer(program, all_cores, in_scalar_cb_config); - CircularBufferID raw_in_cb = 0; + CBHandle raw_in_cb = 0; if (input.memory_config().is_sharded()) { // incoming data is the input cb instead of raw l1/dram addr auto raw_in_cb_id = CB::c_in2; @@ -319,7 +319,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic(const Tensor &inp .set_page_size(out_cb_id, out_cb_pagesize); auto cb_out = tt_metal::CreateCircularBuffer(program, all_cores, cb_out_config); - CircularBufferID cb_sharded_out = 0; + CBHandle cb_sharded_out = 0; if (output.memory_config().is_sharded()) { uint32_t sharded_out_cb_id = CB::c_out1; // output rows in RM @@ -1121,7 +1121,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T .set_page_size(out_cb_id, out_cb_pagesize); auto cb_out = tt_metal::CreateCircularBuffer(program, all_cores, cb_out_config); - CircularBufferID cb_sharded_out = 0; + CBHandle cb_sharded_out = 0; if (output.memory_config().is_sharded()) { uint32_t sharded_out_cb_id = CB::c_out1; // output rows in RM diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp index d934a62d2e2..832ac54de65 100644 --- a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp +++ b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp @@ -319,7 +319,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T .set_page_size(out_cb_id, out_cb_pagesize); auto cb_out = tt_metal::CreateCircularBuffer(program, all_cores, cb_out_config); - CircularBufferID cb_sharded_out = 0; + CBHandle cb_sharded_out = 0; if (output.memory_config().is_sharded()) { uint32_t sharded_out_cb_id = CB::c_out1; // output rows in RM diff --git a/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp index a68bf29b182..5f75cbbad05 100644 --- a/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp +++ b/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp @@ -60,9 +60,9 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out string compute_kernel_name = reduce_op_utils::dim_to_kernel_name(reduce_dim, reduce_op); uint32_t src0_cb_index = CB::c_in0; - CircularBufferID cb_src0; + CBHandle cb_src0; uint32_t src1_cb_index = CB::c_in1; - CircularBufferID cb_src1 = 0; + CBHandle cb_src1 = 0; if (in_sharded) { uint32_t num_shard_tiles = a.shard_spec().value().numel() / TILE_HW; uint32_t num_input_tiles = 2; @@ -86,7 +86,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out auto cb_scaler = tt_metal::CreateCircularBuffer(program, all_cores, cb_scaler_config); uint32_t output_cb_index = CB::c_out0; // output operands start at index 16 - CircularBufferID cb_output; + CBHandle cb_output; if (out_sharded) { uint32_t num_output_tiles = output.shard_spec().value().numel() / TILE_HW; tt_metal::CircularBufferConfig cb_output_config = tt_metal::CircularBufferConfig(num_output_tiles * dst_single_tile_size, {{output_cb_index, dst_cb_data_format}}) @@ -99,7 +99,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out cb_output = tt_metal::CreateCircularBuffer(program, all_cores, cb_output_config); } tt_metal::Buffer *src0_buffer = a.buffer(); - tt_metal::KernelID reader_kernel_id; + tt_metal::KernelHandle reader_kernel_id; bfloat16 bfloat_scaler_value = bfloat16(scaler); uint32_t packed_scaler_value = pack_two_bfloat16_into_uint32({bfloat_scaler_value, bfloat_scaler_value}); if (in_sharded) { @@ -135,7 +135,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out } tt_metal::Buffer *dst_buffer = output.buffer(); - tt_metal::KernelID writer_kernel_id; + tt_metal::KernelHandle writer_kernel_id; if (out_sharded) { vector writer_ct_args = { diff --git a/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp b/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp index fb8b99d9f23..9b64a03bca9 100644 --- a/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp +++ b/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp @@ -77,13 +77,13 @@ operation::ProgramWithCallbacks reduce_multi_core_w(const Tensor &a, Tensor& out (std::uint32_t) dst_is_dram }; - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/reduce/kernels/dataflow/reader_unary_reduce_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp b/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp index 3a65b6984e4..3db93bb6f95 100644 --- a/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp @@ -84,7 +84,7 @@ operation::ProgramWithCallbacks reduce_single_core(const Tensor &a, Tensor& outp if (reduce_dim == ReduceOpDim::H) { reader_defines["REDUCE_SCALER"] = "1"; } - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, reduce_dim == ReduceOpDim::H ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_transpose_wh_interleaved.cpp" : @@ -92,7 +92,7 @@ operation::ProgramWithCallbacks reduce_single_core(const Tensor &a, Tensor& outp core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp b/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp index 2fb4ff882e8..0b643e1212d 100644 --- a/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp +++ b/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp @@ -57,13 +57,13 @@ operation::ProgramWithCallbacks reshape_tile_single_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, @@ -200,13 +200,13 @@ operation::ProgramWithCallbacks reshape_rm_single_core(const Tensor &a, Tensor& writer_compile_time_args.push_back(0); } - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_reshape_stick_layout_interleaved.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_reshape_stick_layout_interleaved.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp index f4dcabcf973..4fbf12a2709 100644 --- a/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp @@ -177,13 +177,13 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core(const Tensor &input, }); } - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_rotary_embedding_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines=kernel_defines}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_rotary_embedding_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp b/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp index 779d7ddad72..3616d69a4ba 100644 --- a/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp @@ -178,13 +178,13 @@ operation::ProgramWithCallbacks rotary_embedding_single_core(const Tensor &input }); } - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_rotary_embedding_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines=kernel_defines}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_rotary_embedding_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp b/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp index 8f826572857..97c39853d1a 100644 --- a/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp @@ -78,13 +78,13 @@ operation::ProgramWithCallbacks rotate_half_single_core(const Tensor &input, Ten (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_rotate_half_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_rotate_half_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp index 44c9006d5f6..9de93186cc9 100644 --- a/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp @@ -68,7 +68,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(const Tensor & bool src_is_dram = src_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; - tt_metal::KernelID unary_reader_kernel_id; + tt_metal::KernelHandle unary_reader_kernel_id; if (input.layout() == Layout::TILE) { std::vector reader_compile_time_args = { @@ -99,7 +99,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(const Tensor & } std::vector writer_compile_time_args = {out_cb_index}; - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, @@ -262,7 +262,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(const Tensor & (std::uint32_t) src0_cb_index }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, @@ -271,7 +271,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(const Tensor & bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0; - tt_metal::KernelID unary_writer_kernel_id; + tt_metal::KernelHandle unary_writer_kernel_id; if (input.layout() == Layout::TILE) { std::vector writer_compile_time_args = { (std::uint32_t) src0_cb_index, diff --git a/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp b/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp index a104c124595..384149e4d56 100644 --- a/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp +++ b/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp @@ -152,9 +152,9 @@ operation::ProgramWithCallbacks scale_mask_softmax_(const Tensor &input_tensor, auto cb_in2_id = CreateCircularBuffer( program, all_device_cores, c_in2_config ); auto c_intermed0_config = CircularBufferConfig(im0_t * in0_tile_size, {{CB::c_intermed0, in0_cb_data_format}}).set_page_size(CB::c_intermed0, in0_tile_size); auto cb_intermed0_id = CreateCircularBuffer( program, all_device_cores, c_intermed0_config ); - std::optional cb_intermed3_id; - std::optional cb_in3_id; - std::optional cb_in4_id; + std::optional cb_intermed3_id; + std::optional cb_in3_id; + std::optional cb_in4_id; if (mask.has_value()) { CircularBufferConfig c_intermed3_config = CircularBufferConfig(im3_t * in0_tile_size, {{CB::c_intermed3, in0_cb_data_format}}).set_page_size(CB::c_intermed3, in0_tile_size); cb_intermed3_id = CreateCircularBuffer( program, all_device_cores, c_intermed3_config ); diff --git a/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp b/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp index 3e4d123709c..8e64ce8db93 100644 --- a/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp +++ b/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp @@ -34,8 +34,8 @@ void setup_runtime( tt_metal::Buffer *in0_buffer, tt_metal::Buffer *out0_buffer, tt_metal::Buffer *out1_buffer, - tt_metal::KernelID reader_kernel_id, - tt_metal::KernelID writer_kernel_id) { + tt_metal::KernelHandle reader_kernel_id, + tt_metal::KernelHandle writer_kernel_id) { uint32_t start_core_x = 0; uint32_t start_core_y = 0; diff --git a/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp index 764fc2c93af..21eb0eea5b8 100644 --- a/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp @@ -197,7 +197,7 @@ operation::ProgramWithCallbacks tilize_multi_core_interleaved(const Tensor &a, T (std::uint32_t) stick_size_is_power_of_two, (std::uint32_t) log2_stick_size, }; - KernelID unary_reader_kernel_id = CreateKernel( + KernelHandle unary_reader_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_split_rows_interleaved.cpp", all_cores, @@ -213,7 +213,7 @@ operation::ProgramWithCallbacks tilize_multi_core_interleaved(const Tensor &a, T (std::uint32_t) output_cb_index, (std::uint32_t) out_is_dram }; - KernelID unary_writer_kernel_id = CreateKernel( + KernelHandle unary_writer_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, @@ -415,13 +415,13 @@ operation::ProgramWithCallbacks tilize_multi_core_sharded(const Tensor &input, T (std::uint32_t) output_cb_index }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp", all_cores, @@ -558,7 +558,7 @@ operation::ProgramWithCallbacks tilize_with_val_padding_multi_core(const Tensor /** reader */ - KernelID unary_reader_kernel_id; + KernelHandle unary_reader_kernel_id; std::vector reader_ct_args = { (std::uint32_t) src0_cb_index, (std::uint32_t) src1_cb_index, @@ -573,7 +573,7 @@ operation::ProgramWithCallbacks tilize_with_val_padding_multi_core(const Tensor /** writer */ - KernelID unary_writer_kernel_id; + KernelHandle unary_writer_kernel_id; bool out_is_dram = dst_buffer->buffer_type() == BufferType::DRAM ? 1 : 0; vector writer_ct_args = { output_cb_index, diff --git a/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp b/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp index 761f0190d09..24e8c854901 100644 --- a/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp @@ -111,14 +111,14 @@ operation::ProgramWithCallbacks tilize_single_core(const Tensor &a, Tensor& outp }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_split_rows_interleaved.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); // Tilized writer - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, @@ -314,14 +314,14 @@ operation::ProgramWithCallbacks tilize_with_val_padding_single_core(const Tensor }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_pad_dims_split_rows.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); // Tilized writer - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp index fb40179f623..62116c806e5 100644 --- a/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp @@ -136,13 +136,13 @@ operation::ProgramWithCallbacks transpose_cn_single_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_cn_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp index 0d0db7ae3b6..bebe91515e5 100644 --- a/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp @@ -147,13 +147,13 @@ operation::ProgramWithCallbacks transpose_hc_multi_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp", total_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", total_cores, diff --git a/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp b/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp index 822f7c9d117..122cc4d25c9 100644 --- a/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp @@ -84,13 +84,13 @@ operation::ProgramWithCallbacks transpose_wh_single_core(const Tensor &a, Tensor }; //TODO: move this kernel, currently being used in reduce, can't move to op library - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_transpose_wh_interleaved.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, @@ -235,13 +235,13 @@ operation::ProgramWithCallbacks transpose_hc_single_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, @@ -350,13 +350,13 @@ operation::ProgramWithCallbacks transpose_cn_single_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_cn_interleaved.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp index 5cd83065a3a..ab963afef14 100644 --- a/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp @@ -144,13 +144,13 @@ operation::ProgramWithCallbacks transpose_wh_multi_core(const Tensor &a, Tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_wh_interleaved_start_id.cpp", total_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", total_cores, diff --git a/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp index 5d09d590976..5a881cb91a8 100644 --- a/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp @@ -171,13 +171,13 @@ operation::ProgramWithCallbacks unpad_rm_multi_core(const Tensor &a, Tensor& out (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_rm_interleaved_start_id.cpp", total_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args_vec}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", total_cores, @@ -393,13 +393,13 @@ operation::ProgramWithCallbacks unpad_tile_multi_core(const Tensor &a, Tensor& o }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_interleaved_start_id.cpp", total_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", total_cores, diff --git a/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp b/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp index 8969ca374d9..d7c9ed31e8a 100644 --- a/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp @@ -126,13 +126,13 @@ operation::ProgramWithCallbacks unpad_rm_single_core(const Tensor &a, Tensor& ou }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_rm_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args_vec}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp", core, @@ -281,13 +281,13 @@ operation::ProgramWithCallbacks unpad_tile_single_core(const Tensor &a, Tensor& }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp index 9a222160d1b..696c97691f8 100644 --- a/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp @@ -233,7 +233,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out /** reader */ - KernelID unary_reader_kernel_id; + KernelHandle unary_reader_kernel_id; if (src_sharded) { std::vector reader_ct_args = { @@ -263,7 +263,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out /** writer */ - KernelID unary_writer_kernel_id; + KernelHandle unary_writer_kernel_id; if (out_sharded) { std::vector writer_ct_args = { (std::uint32_t) output_cb_index @@ -650,7 +650,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor .set_page_size(output_cb_index, output_single_tile_size); auto cb_output = tt_metal::CreateCircularBuffer(program, all_cores, output_cb_config); - CircularBufferID cb_sharded_output = 0; + CBHandle cb_sharded_output = 0; uint32_t sharded_output_cb_index = CB::c_out1; if (out_sharded) { tt_metal::CircularBufferConfig sharded_output_cb_config = tt_metal::CircularBufferConfig(num_output_rows_unpadded * block_row_size, {{sharded_output_cb_index, output_cb_data_format}}) @@ -664,7 +664,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor /** reader */ - KernelID unary_reader_kernel_id; + KernelHandle unary_reader_kernel_id; std::vector reader_ct_args = { (std::uint32_t) src0_cb_index }; @@ -677,7 +677,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor /** writer */ - KernelID unary_writer_kernel_id; + KernelHandle unary_writer_kernel_id; if (out_sharded) { vector writer_ct_args = { (uint32_t) output_cb_index, diff --git a/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp b/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp index b48e9902c56..382e4112b2a 100644 --- a/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp @@ -104,14 +104,14 @@ operation::ProgramWithCallbacks untilize_single_core(const Tensor &a, Tensor& ou }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); // Untilized writer - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_split_rows_interleaved.cpp", core, @@ -283,14 +283,14 @@ operation::ProgramWithCallbacks untilize_with_unpadding_single_core(const Tensor }; // Tilized reader - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); // Untilized writer - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_unpad_dims_split_rows.cpp", core, diff --git a/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp b/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp index e64b932a880..bc58ef916b7 100644 --- a/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp +++ b/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp @@ -261,7 +261,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i (std::uint32_t) src_cb_id }; - KernelID reader_kernel_id = CreateKernel( + KernelHandle reader_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, @@ -284,7 +284,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i (std::uint32_t) pc.pad_w, (std::uint32_t) pc.pad_h, }; - KernelID writer_kernel_id = CreateKernel( + KernelHandle writer_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp", all_cores, @@ -300,7 +300,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i (uint32_t) nblocks_per_core, // per_core_block_cnt (uint32_t) ntiles_per_block, // per_block_ntiles }; - KernelID untilize_kernel_id = CreateKernel( + KernelHandle untilize_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/untilize/kernels/compute/untilize.cpp", all_cores, @@ -899,7 +899,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a (std::uint32_t) src_cb_id }; - KernelID reader_kernel_id = CreateKernel( + KernelHandle reader_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp", all_cores, @@ -922,7 +922,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a (std::uint32_t) pad_w, (std::uint32_t) pad_h, }; - KernelID writer_kernel_id = CreateKernel( + KernelHandle writer_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp", all_cores, @@ -938,7 +938,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a (uint32_t) nblocks_per_core, // per_core_block_cnt (uint32_t) ntiles_per_block, // per_block_ntiles }; - KernelID untilize_kernel_id = CreateKernel( + KernelHandle untilize_kernel_id = CreateKernel( program, "tt_eager/tt_dnn/op_library/untilize/kernels/compute/untilize.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp index d7157d0c181..3ea86119fa9 100644 --- a/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp @@ -55,13 +55,13 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", all_cores, diff --git a/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp b/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp index 8f86415a57b..27f4a076a37 100644 --- a/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp +++ b/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp @@ -100,13 +100,13 @@ operation::ProgramWithCallbacks update_cache_single_core(const Tensor& cache_ten (std::uint32_t) interm2_cb_index }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_update_cache_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_update_cache_interleaved_start_id.cpp", core, @@ -230,13 +230,13 @@ operation::ProgramWithCallbacks fill_cache_single_core(const Tensor& cache_tenso (std::uint32_t) dst_is_dram }; - tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp", core, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args}); - tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel( + tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel( program, "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp", core, diff --git a/tt_metal/detail/program.hpp b/tt_metal/detail/program.hpp index f51e893b893..b8e3fb38a6b 100644 --- a/tt_metal/detail/program.hpp +++ b/tt_metal/detail/program.hpp @@ -13,16 +13,16 @@ using namespace tt::tt_metal; namespace tt::tt_metal::detail{ - inline KernelID AddKernel ( Program & program, Kernel * kernel) + inline KernelHandle AddKernel ( Program & program, Kernel * kernel) { return program.add_kernel(kernel); } - inline Kernel *GetKernel(const Program &program, KernelID kernel_id) { + inline Kernel *GetKernel(const Program &program, KernelHandle kernel_id) { return program.kernels_.at(kernel_id); } - inline std::shared_ptr GetCircularBuffer(const Program &program, CircularBufferID id) { + inline std::shared_ptr GetCircularBuffer(const Program &program, CBHandle id) { return program.get_circular_buffer(id); } diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp index 3fa9f3e65a1..46627a31547 100644 --- a/tt_metal/detail/tt_metal.hpp +++ b/tt_metal/detail/tt_metal.hpp @@ -355,7 +355,7 @@ namespace tt::tt_metal{ bool riscv0_in_use = false; bool riscv1_in_use = false; bool noc0_in_use = false; bool noc1_in_use = false; - auto set_global_and_local_noc_usage = [&](KernelID kernel_id, bool &local_noc0_usage, bool &local_noc1_usage) { + auto set_global_and_local_noc_usage = [&](KernelHandle kernel_id, bool &local_noc0_usage, bool &local_noc1_usage) { const auto kernel = detail::GetKernel(program, kernel_id); auto kernel_config = std::get(kernel->config()); auto noc_value = magic_enum::enum_integer(kernel_config.noc); diff --git a/tt_metal/host_api.hpp b/tt_metal/host_api.hpp index 3816a25a8db..df2bf03673e 100644 --- a/tt_metal/host_api.hpp +++ b/tt_metal/host_api.hpp @@ -82,7 +82,7 @@ Program CreateProgram(); * | core_spec | Either a single logical core, a range of logical cores or a set of logical core ranges that indicate which cores kernel is placed on | const std::variant & | | Yes | * | config | Config for data movement or compute kernel | const std::variant & | | No | */ -KernelID CreateKernel(Program &program, const std::string &file_name, const std::variant &core_spec, const std::variant & config); +KernelHandle CreateKernel(Program &program, const std::string &file_name, const std::variant &core_spec, const std::variant & config); // ================================================== // HOST API: buffers @@ -103,7 +103,7 @@ KernelID CreateKernel(Program &program, const std::string &file_name, const std: * | core_spec | Either a single logical core, a range of logical cores or a set of logical core ranges that indicate where the circular buffer will be configured | const std::variant & | | Yes | * | config | Config for circular buffer | const CircularBufferConfig & | | Yes | */ -CircularBufferID CreateCircularBuffer(Program &program, const std::variant &core_spec, const CircularBufferConfig &config); +CBHandle CreateCircularBuffer(Program &program, const std::variant &core_spec, const CircularBufferConfig &config); /** * Gets a reference to the config owned by circular buffer at the given circular buffer ID. @@ -113,9 +113,9 @@ CircularBufferID CreateCircularBuffer(Program &program, const std::variant & | Any logical Tensix core coordinate(s) on which the kernel is placed | Yes | * | runtime_args | The runtime args to be written | const std::vector & | | Yes | */ -void SetRuntimeArgs(const Program &program, KernelID kernel, const std::variant &core_spec, const std::vector &runtime_args); +void SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::variant &core_spec, const std::vector &runtime_args); /** * Set multiple runtime arguments of a kernel at once during runtime, each mapping to a specific core. The runtime args for each core may be unique. @@ -222,11 +222,11 @@ void SetRuntimeArgs(const Program &program, KernelID kernel, const std::variant< * | Argument | Description | Type | Valid Range | Required | * |--------------|------------------------------------------------------------------------|--------------------------------------------------------|----------------------------------------------------------------------------|----------| * | program | The program containing kernels, circular buffers, semaphores | const Program & | | Yes | - * | kernel_id | ID of the kernel that will receive the runtime args | KernelID (uint64_t) | | Yes | + * | kernel_id | ID of the kernel that will receive the runtime args | KernelHandle (uint64_t) | | Yes | * | core_spec | Location of Tensix core(s) where the runtime args will be written | const std::vector & | Any set of logical Tensix core coordinates on which the kernel is placed | Yes | * | runtime_args | The runtime args to be written | const std::vector< vector > & | outer vector size must be equal to size of core_spec vector | Yes | */ -void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector > &runtime_args); +void SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector > &runtime_args); /** * Get the runtime args for a kernel. @@ -236,10 +236,10 @@ void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< * | Argument | Description | Type | Valid Range | Required | * |--------------|------------------------------------------------------------------------|-------------------------------|------------------------------------|----------| * | program | The program containing kernels, circular buffers, semaphores | const Program & | | Yes | - * | kernel_id | ID of the kernel that will receive the runtime args | KernelID (uint64_t) | | Yes | + * | kernel_id | ID of the kernel that will receive the runtime args | KernelHandle (uint64_t) | | Yes | * | logical_core | The location of the Tensix core where the runtime args will be written | const CoreCoord & | Any logical Tensix core coordinate | Yes | */ -std::vector& GetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreCoord &logical_core); +std::vector& GetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreCoord &logical_core); /** * Reads a buffer from the device diff --git a/tt_metal/impl/buffers/circular_buffer.hpp b/tt_metal/impl/buffers/circular_buffer.hpp index bc6f53f1ba6..672c53a9612 100644 --- a/tt_metal/impl/buffers/circular_buffer.hpp +++ b/tt_metal/impl/buffers/circular_buffer.hpp @@ -17,7 +17,7 @@ class CircularBuffer { public: CircularBuffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config); - const CircularBufferID id() const { return id_; } + const CBHandle id() const { return id_; } const CoreRangeSet &core_ranges() const { return core_ranges_; } diff --git a/tt_metal/impl/buffers/circular_buffer_types.hpp b/tt_metal/impl/buffers/circular_buffer_types.hpp index ab78763d3df..8bd384c0231 100644 --- a/tt_metal/impl/buffers/circular_buffer_types.hpp +++ b/tt_metal/impl/buffers/circular_buffer_types.hpp @@ -17,7 +17,7 @@ namespace tt::tt_metal { -using CircularBufferID = uintptr_t; +using CBHandle = uintptr_t; class CircularBufferConfig { public: diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp index 762c5af5b9f..da7d199d963 100644 --- a/tt_metal/impl/dispatch/command_queue.cpp +++ b/tt_metal/impl/dispatch/command_queue.cpp @@ -164,14 +164,14 @@ ProgramMap ConstructProgramMap(const Device* device, Program& program) { // which use multiple core ranges bool linked = dst_noc_multicast_info.size() == 1; - vector kernel_ids; + vector kernel_ids; if (kg.riscv0_id) kernel_ids.push_back(kg.riscv0_id.value()); if (kg.riscv1_id) kernel_ids.push_back(kg.riscv1_id.value()); if (kg.compute_id) kernel_ids.push_back(kg.compute_id.value()); uint32_t src_copy = src; for (size_t i = 0; i < kernel_ids.size(); i++) { - KernelID kernel_id = kernel_ids[i]; + KernelHandle kernel_id = kernel_ids[i]; vector sub_kernels; const Kernel* kernel = detail::GetKernel(program, kernel_id); if (kernel->processor() == RISCV::COMPUTE) { @@ -254,11 +254,11 @@ ProgramMap ConstructProgramMap(const Device* device, Program& program) { // Create a vector of all program binaries/cbs/semaphores uint32_t program_page_idx = 0; for (const KernelGroup &kg: program.get_kernel_groups()) { - vector kernel_ids; + vector kernel_ids; if (kg.riscv0_id) kernel_ids.push_back(kg.riscv0_id.value()); if (kg.riscv1_id) kernel_ids.push_back(kg.riscv1_id.value()); if (kg.compute_id) kernel_ids.push_back(kg.compute_id.value()); - for (KernelID kernel_id: kernel_ids) { + for (KernelHandle kernel_id: kernel_ids) { const Kernel* kernel = detail::GetKernel(program, kernel_id); for (const ll_api::memory& kernel_bin : kernel->binaries(device->id())) { diff --git a/tt_metal/impl/kernels/kernel_types.hpp b/tt_metal/impl/kernels/kernel_types.hpp index 0eb789fc4b6..8b702eca750 100644 --- a/tt_metal/impl/kernels/kernel_types.hpp +++ b/tt_metal/impl/kernels/kernel_types.hpp @@ -11,7 +11,7 @@ namespace tt::tt_metal { -using KernelID = std::uint16_t; +using KernelHandle = std::uint16_t; enum class DataMovementProcessor { RISCV_0 = 0, // BRISC diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp index b0a43ff6bf2..fc054ee9e6e 100644 --- a/tt_metal/impl/program/program.cpp +++ b/tt_metal/impl/program/program.cpp @@ -95,16 +95,16 @@ std::atomic Program::program_counter = 0; Program::Program(): id(program_counter++),worker_crs_({}), local_circular_buffer_allocation_needed_(false) {} -KernelID Program::add_kernel(Kernel *kernel) { +KernelHandle Program::add_kernel(Kernel *kernel) { this->invalidate_compile(); - KernelID id = kernels_.size(); + KernelHandle id = kernels_.size(); kernels_.push_back(kernel); kernel_groups_.resize(0); core_to_kernel_group_index_table_.clear(); return id; } -Kernel *Program::get_kernel(KernelID kernel_id) const { +Kernel *Program::get_kernel(KernelHandle kernel_id) const { //TT_ASSERT(kernel_id < this->kernels_.size(), "Expected Kernel with ID {} to be in Program {}", kernel_id, this->id); return this->kernels_.at(kernel_id); } @@ -114,10 +114,10 @@ KernelGroup::KernelGroup() : core_ranges({}) { KernelGroup::KernelGroup( const Program &program, - std::optional brisc_id, - std::optional ncrisc_id, - std::optional trisc_id, - std::optional erisc_id, + std::optional brisc_id, + std::optional ncrisc_id, + std::optional trisc_id, + std::optional erisc_id, int last_cb_index, const CoreRangeSet &new_ranges) : core_ranges({}) { @@ -187,26 +187,26 @@ KernelGroup * Program::kernels_on_core(const CoreCoord &core) { struct KernelGroupInt { bool valid; - std::optional trisc_id = std::nullopt; - std::optional brisc_id = std::nullopt; - std::optional ncrisc_id = std::nullopt; - std::optional erisc_id = std::nullopt; + std::optional trisc_id = std::nullopt; + std::optional brisc_id = std::nullopt; + std::optional ncrisc_id = std::nullopt; + std::optional erisc_id = std::nullopt; bool operator==(const KernelGroupInt& b) const; void update(Kernel* kernel, size_t kernel_idx) { RISCV riscv_processor = kernel->processor(); switch (riscv_processor) { case RISCV::BRISC: - this->brisc_id = static_cast(kernel_idx); + this->brisc_id = static_cast(kernel_idx); break; case RISCV::NCRISC: - this->ncrisc_id = static_cast(kernel_idx); + this->ncrisc_id = static_cast(kernel_idx); break; case RISCV::COMPUTE: - this->trisc_id = static_cast(kernel_idx); + this->trisc_id = static_cast(kernel_idx); break; case RISCV::ERISC: - this->erisc_id = static_cast(kernel_idx); + this->erisc_id = static_cast(kernel_idx); break; default: TT_ASSERT(false, "Unsupported kernel processor!"); @@ -337,7 +337,7 @@ void Program::CircularBufferAllocator::mark_address(uint64_t address, uint64_t s } } -CircularBufferID Program::add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config) { +CBHandle Program::add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config) { this->invalidate_compile(); std::shared_ptr circular_buffer = std::make_shared(core_range_set, config); // Globally allocated circular buffer do not invalidate allocation because their addresses are tracked by memory allocator @@ -378,7 +378,7 @@ CircularBufferID Program::add_circular_buffer(const CoreRangeSet &core_range_set return circular_buffer->id(); } -std::shared_ptr Program::get_circular_buffer(CircularBufferID cb_id) const { +std::shared_ptr Program::get_circular_buffer(CBHandle cb_id) const { if (this->circular_buffer_by_id_.find(cb_id) == this->circular_buffer_by_id_.end()) { TT_THROW("No circular buffer with id {} exists in Program {}", cb_id, this->id); } diff --git a/tt_metal/impl/program/program.hpp b/tt_metal/impl/program/program.hpp index d22ee2745fb..8fbe26330c5 100644 --- a/tt_metal/impl/program/program.hpp +++ b/tt_metal/impl/program/program.hpp @@ -24,26 +24,26 @@ namespace tt_metal { // Fwd declares namespace detail{ void ValidateCircularBufferRegion(const Program &program, const Device *device); - KernelID AddKernel ( Program & program, Kernel * kernel); - Kernel *GetKernel(const Program &program, KernelID kernel_id); - std::shared_ptr GetCircularBuffer(const Program &program, CircularBufferID id); + KernelHandle AddKernel ( Program & program, Kernel * kernel); + Kernel *GetKernel(const Program &program, KernelHandle kernel_id); + std::shared_ptr GetCircularBuffer(const Program &program, CBHandle id); } struct KernelGroup { CoreRangeSet core_ranges; - std::optional compute_id = std::nullopt; - std::optional riscv0_id = std::nullopt; - std::optional riscv1_id = std::nullopt; - std::optional erisc_id = std::nullopt; + std::optional compute_id = std::nullopt; + std::optional riscv0_id = std::nullopt; + std::optional riscv1_id = std::nullopt; + std::optional erisc_id = std::nullopt; launch_msg_t launch_msg; KernelGroup(); KernelGroup( const Program &program, - std::optional brisc_id, - std::optional ncrisc_id, - std::optional trisc_id, - std::optional erisc_id, + std::optional brisc_id, + std::optional ncrisc_id, + std::optional trisc_id, + std::optional erisc_id, int last_cb_index, const CoreRangeSet &new_ranges); }; @@ -133,7 +133,7 @@ class Program { CoreCoord grid_extent_; std::vector> circular_buffers_; - std::unordered_map> circular_buffer_by_id_; + std::unordered_map> circular_buffer_by_id_; // Tracks which circular buffer indices are being used std::unordered_map> per_core_cb_indices_; // Used to generate circular buffer addresses. There is one CircularBufferAllocator per unique CoreRange @@ -149,19 +149,19 @@ class Program { std::vector kernel_groups_; std::vector core_to_kernel_group_index_table_; - friend CircularBufferID CreateCircularBuffer(Program &program, const std::variant &core_spec, const CircularBufferConfig &config); - friend std::shared_ptr detail::GetCircularBuffer(const Program &program, CircularBufferID id); + friend CBHandle CreateCircularBuffer(Program &program, const std::variant &core_spec, const CircularBufferConfig &config); + friend std::shared_ptr detail::GetCircularBuffer(const Program &program, CBHandle id); friend void detail::ValidateCircularBufferRegion(const Program &program, const Device *device); - friend KernelID detail::AddKernel(Program &program, Kernel *kernel); - friend Kernel *detail::GetKernel(const Program &program, KernelID kernel_id); + friend KernelHandle detail::AddKernel(Program &program, Kernel *kernel); + friend Kernel *detail::GetKernel(const Program &program, KernelHandle kernel_id); friend uint32_t CreateSemaphore(Program &program, const std::variant &core_spec, uint32_t initial_value); - KernelID add_kernel(Kernel *kernel); - Kernel *get_kernel(KernelID kernel_id) const; + KernelHandle add_kernel(Kernel *kernel); + Kernel *get_kernel(KernelHandle kernel_id) const; - CircularBufferID add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config); - std::shared_ptr get_circular_buffer(CircularBufferID cb_id) const; + CBHandle add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config); + std::shared_ptr get_circular_buffer(CBHandle cb_id) const; void add_semaphore(const CoreRangeSet & crs, uint32_t address, uint32_t init_value); diff --git a/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp b/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp index 74bf27d3e92..810e181e996 100644 --- a/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp +++ b/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp @@ -92,28 +92,28 @@ int main(int argc, char **argv) { constexpr uint32_t src0_cb_index = CB::c_in0; constexpr uint32_t num_input_tiles = 2; CircularBufferConfig cb_src0_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src0_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src0_cb_index, single_tile_size); - CircularBufferID cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); + CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); constexpr uint32_t src1_cb_index = CB::c_in1; CircularBufferConfig cb_src1_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src1_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src1_cb_index, single_tile_size); - CircularBufferID cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config); + CBHandle cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config); constexpr uint32_t output_cb_index = CB::c_out0; constexpr uint32_t num_output_tiles = 2; CircularBufferConfig cb_output_config = CircularBufferConfig(num_output_tiles * single_tile_size, {{output_cb_index, tt::DataFormat::Float16_b}}).set_page_size(output_cb_index, single_tile_size); - CircularBufferID cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); + CBHandle cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); /* * Specify data movement kernels for reading/writing data to/from * DRAM. */ - KernelID binary_reader_kernel_id = CreateKernel( + KernelHandle binary_reader_kernel_id = CreateKernel( program, "tt_metal/kernels/dataflow/reader_binary_diff_lengths.cpp", core, DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default}); - KernelID unary_writer_kernel_id = CreateKernel( + KernelHandle unary_writer_kernel_id = CreateKernel( program, "tt_metal/kernels/dataflow/writer_unary.cpp", core, @@ -132,7 +132,7 @@ int main(int argc, char **argv) { * Use the add_tiles operation available in the eltwise_binary * compute kernel. */ - KernelID eltwise_binary_kernel_id = CreateKernel( + KernelHandle eltwise_binary_kernel_id = CreateKernel( program, "tt_metal/kernels/compute/eltwise_binary.cpp", core, diff --git a/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp b/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp index 562d5c26325..44ec4de8746 100644 --- a/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp +++ b/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp @@ -63,24 +63,24 @@ int main(int argc, char **argv) { constexpr uint32_t src0_cb_index = CB::c_in0; constexpr uint32_t num_input_tiles = 2; CircularBufferConfig cb_src0_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src0_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src0_cb_index, single_tile_size); - CircularBufferID cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); + CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); constexpr uint32_t output_cb_index = CB::c_out0; constexpr uint32_t num_output_tiles = 2; CircularBufferConfig cb_output_config = CircularBufferConfig(num_output_tiles * single_tile_size, {{output_cb_index, tt::DataFormat::Float16_b}}).set_page_size(output_cb_index, single_tile_size); - CircularBufferID cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); + CBHandle cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); /* * Specify data movement kernels for reading/writing data to/from * DRAM. */ - KernelID unary_reader_kernel_id = CreateKernel( + KernelHandle unary_reader_kernel_id = CreateKernel( program, "tt_metal/kernels/dataflow/reader_unary.cpp", core, DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default}); - KernelID unary_writer_kernel_id = CreateKernel( + KernelHandle unary_writer_kernel_id = CreateKernel( program, "tt_metal/kernels/dataflow/writer_unary.cpp", core, @@ -105,7 +105,7 @@ int main(int argc, char **argv) { {"SFPU_OP_CHAIN_0", "exp_tile_init(); exp_tile(0);"} }; - KernelID eltwise_sfpu_kernel_id = CreateKernel( + KernelHandle eltwise_sfpu_kernel_id = CreateKernel( program, "tt_metal/kernels/compute/eltwise_sfpu.cpp", core, diff --git a/tt_metal/programming_examples/loopback/loopback.cpp b/tt_metal/programming_examples/loopback/loopback.cpp index 225ff192e39..33de9f92008 100644 --- a/tt_metal/programming_examples/loopback/loopback.cpp +++ b/tt_metal/programming_examples/loopback/loopback.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) { constexpr CoreCoord core = {0, 0}; - KernelID dram_copy_kernel_id = CreateKernel( + KernelHandle dram_copy_kernel_id = CreateKernel( program, "tt_metal/programming_examples/loopback/kernels/loopback_dram_copy.cpp", core, diff --git a/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp b/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp index ba2cedd9a44..5341bf54bb0 100644 --- a/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp +++ b/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp @@ -354,8 +354,8 @@ void matmul_multicore_reuse_mcast(vector& a, vector& b, vect /* * Kernels - Runtime arguments */ - std::vector reader_kernel_ids; - std::vector writer_kernel_ids; + std::vector reader_kernel_ids; + std::vector writer_kernel_ids; for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) { for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) { CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y}; diff --git a/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp b/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp index 2206088932d..3c2d5d70037 100644 --- a/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp +++ b/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp @@ -25,18 +25,18 @@ bool RunCustomCycle(tt_metal::Device *device, int loop_count, string run_name = {"LOOP_SIZE", std::to_string(loop_size)} }; - tt_metal::KernelID brisc_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel( program, "tt_metal/programming_examples/profiler/test_custom_cycle_count/kernels/custom_cycle_count.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .defines = kernel_defines}); - tt_metal::KernelID ncrisc_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle ncrisc_kernel = tt_metal::CreateKernel( program, "tt_metal/programming_examples/profiler/test_custom_cycle_count/kernels/custom_cycle_count.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .defines = kernel_defines}); vector trisc_kernel_args = {}; - tt_metal::KernelID trisc_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle trisc_kernel = tt_metal::CreateKernel( program, "tt_metal/programming_examples/profiler/test_custom_cycle_count/kernels/custom_cycle_count_compute.cpp", all_cores, tt_metal::ComputeConfig{.compile_args = trisc_kernel_args, .defines = kernel_defines} diff --git a/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp b/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp index 31e96736bc2..8a6b54589e1 100644 --- a/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp +++ b/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp @@ -25,18 +25,18 @@ bool RunCustomCycle(tt_metal::Device *device, int loop_count, string run_name = {"LOOP_SIZE", std::to_string(loop_size)} }; - tt_metal::KernelID brisc_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel( program, "tt_metal/programming_examples/profiler/test_full_buffer/kernels/full_buffer.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .defines = kernel_defines}); - tt_metal::KernelID ncrisc_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle ncrisc_kernel = tt_metal::CreateKernel( program, "tt_metal/programming_examples/profiler/test_full_buffer/kernels/full_buffer.cpp", all_cores, tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .defines = kernel_defines}); vector trisc_kernel_args = {}; - tt_metal::KernelID trisc_kernel = tt_metal::CreateKernel( + tt_metal::KernelHandle trisc_kernel = tt_metal::CreateKernel( program, "tt_metal/programming_examples/profiler/test_full_buffer/kernels/full_buffer_compute.cpp", all_cores, tt_metal::ComputeConfig{.compile_args = trisc_kernel_args, .defines = kernel_defines} diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp index 9eab23ad9c3..a5b679b7817 100644 --- a/tt_metal/tt_metal.cpp +++ b/tt_metal/tt_metal.cpp @@ -69,13 +69,13 @@ std::optional get_semaphore_address(const Program &program, const Core } -inline void SetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreCoord &c, const std::vector &runtime_args) +inline void SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreCoord &c, const std::vector &runtime_args) { detail::GetKernel(program, kernel_id)->set_runtime_args(c, runtime_args); } -inline void SetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreRange &core_range, const std::vector &runtime_args) +inline void SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreRange &core_range, const std::vector &runtime_args) { for (auto x = core_range.start.x; x <= core_range.end.x; x++) { for (auto y = core_range.start.y; y <= core_range.end.y; y++) { @@ -351,8 +351,8 @@ Program CreateProgram(){ return Program(); } -KernelID CreateKernel(Program &program, const std::string &file_name, const std::variant &core_spec, const std::variant &config) { - return std::visit( [&](auto&& cfg) -> KernelID +KernelHandle CreateKernel(Program &program, const std::string &file_name, const std::variant &core_spec, const std::variant &config) { + return std::visit( [&](auto&& cfg) -> KernelHandle { CoreRangeSet core_ranges = detail::GetCoreRangeSet(core_spec); Kernel * kernel; @@ -373,16 +373,16 @@ KernelID CreateKernel(Program &program, const std::string &file_name, const std: ); } -CircularBufferID CreateCircularBuffer(Program &program, const std::variant &core_spec, const CircularBufferConfig &config) { +CBHandle CreateCircularBuffer(Program &program, const std::variant &core_spec, const CircularBufferConfig &config) { CoreRangeSet core_ranges = detail::GetCoreRangeSet(core_spec); return program.add_circular_buffer(core_ranges, config); } -const CircularBufferConfig &GetCircularBufferConfig(Program &program, CircularBufferID cb_handle) { +const CircularBufferConfig &GetCircularBufferConfig(Program &program, CBHandle cb_handle) { return detail::GetCircularBuffer(program, cb_handle)->config(); } -void UpdateCircularBufferTotalSize(Program &program, CircularBufferID cb_handle, uint32_t total_size) { +void UpdateCircularBufferTotalSize(Program &program, CBHandle cb_handle, uint32_t total_size) { std::shared_ptr circular_buffer = detail::GetCircularBuffer(program, cb_handle); if (not circular_buffer->globally_allocated()) { program.invalidate_circular_buffer_allocation(); @@ -390,11 +390,11 @@ void UpdateCircularBufferTotalSize(Program &program, CircularBufferID cb_handle, circular_buffer->config().set_total_size(total_size); } -void UpdateCircularBufferPageSize(Program &program, CircularBufferID cb_handle, uint8_t buffer_index, uint32_t page_size) { +void UpdateCircularBufferPageSize(Program &program, CBHandle cb_handle, uint8_t buffer_index, uint32_t page_size) { detail::GetCircularBuffer(program, cb_handle)->config().set_page_size(buffer_index, page_size); } -void UpdateDynamicCircularBufferAddress(Program &program, CircularBufferID cb_handle, const Buffer &buffer) { +void UpdateDynamicCircularBufferAddress(Program &program, CBHandle cb_handle, const Buffer &buffer) { if (buffer.buffer_type() != BufferType::L1) { TT_FATAL("Only L1 buffers can have an associated circular buffer!"); } @@ -442,7 +442,7 @@ Buffer CreateBuffer(Device *device, std::uint64_t size, std::uint64_t page_size, void DeallocateBuffer(Buffer &buffer) { buffer.deallocate(); } -void SetRuntimeArgs(const Program &program, KernelID kernel_id, const std::variant &core_spec, const std::vector &runtime_args) { +void SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const std::variant &core_spec, const std::vector &runtime_args) { ZoneScoped; std::visit( [&](auto&& core_spec) @@ -461,7 +461,7 @@ void SetRuntimeArgs(const Program &program, KernelID kernel_id, const std::varia ); } -void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector > &runtime_args) +void SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector > &runtime_args) { ZoneScoped; TT_FATAL( core_spec.size() == runtime_args.size(), "Mistmatch between number of cores {} and number of runtime args {} getting updated", core_spec.size(), runtime_args.size()); @@ -470,7 +470,7 @@ void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< k->set_runtime_args(core_spec[i], runtime_args[i]); } -std::vector & GetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreCoord &logical_core) { +std::vector & GetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreCoord &logical_core) { return detail::GetKernel(program, kernel_id)->runtime_args(logical_core); }