From 882fe5f98dd61bd9aff700d38606d523d202ade9 Mon Sep 17 00:00:00 2001
From: Bill Teng <bteng@tenstorrent.com>
Date: Tue, 21 Nov 2023 23:12:54 +0000
Subject: [PATCH] #3939: rename KernelID -> KernelHandle and CircularBufferID
 -> CBHandle

---
 .../host_apis/runtime_args/runtime_args.rst   |  4 +-
 .../tt_metal/examples/dram_loopback.rst       |  2 +-
 .../tt_metal/examples/eltwise_binary.rst      |  8 ++--
 .../source/tt_metal/examples/eltwise_sfpu.rst |  6 +--
 .../matmul/matmul_global_l1.cpp               |  8 ++--
 tests/tt_metal/tt_metal/test_add_two_ints.cpp |  2 +-
 tests/tt_metal/tt_metal/test_compile_args.cpp |  4 +-
 .../test_datacopy_multi_core_multi_dram.cpp   |  6 +--
 .../test_matmul_multi_core_multi_dram.cpp     |  6 +--
 ...matmul_multi_core_multi_dram_in0_mcast.cpp |  8 ++--
 ...ti_core_multi_dram_in0_mcast_in1_mcast.cpp | 14 +++----
 ...matmul_multi_core_multi_dram_in1_mcast.cpp |  8 ++--
 .../test_matmul_multi_core_single_dram.cpp    |  2 +-
 .../tt_metal/test_multi_core_kernel.cpp       | 12 +++---
 .../tt_metal/test_multiple_programs.cpp       |  8 ++--
 .../tt_metal/unit_tests/basic/device.cpp      |  2 +-
 .../test_CircularBuffer_allocation.cpp        |  8 ++--
 .../command_queue/test_EnqueueProgram.cpp     |  2 +-
 .../dprint/test_print_all_harts.cpp           |  6 +--
 .../dprint/test_raise_wait.cpp                |  4 +-
 .../pipelining/basic_pipeline.cpp             |  4 +-
 .../multi_core_h/bcast_op_multi_core_h.cpp    |  4 +-
 .../multi_core_hw/bcast_op_multi_core_hw.cpp  |  4 +-
 .../multi_core_w/bcast_op_multi_core_w.cpp    |  4 +-
 .../single_core/bcast_op_single_core.cpp      |  4 +-
 ...op_multi_core_reuse_mcast_1d_optimized.cpp | 12 +++---
 ...op_multi_core_reuse_mcast_2d_optimized.cpp | 12 +++---
 ..._op_multi_core_reuse_mcast_generalized.cpp | 12 +++---
 .../bmm_op_multi_core_reuse_optimized.cpp     |  4 +-
 .../multi_core/concat_op_multi_core.cpp       |  4 +-
 .../single_core/concat_op_single_core.cpp     |  4 +-
 .../optimized_conv_op.cpp                     | 14 +++----
 .../optimized_conv_op_sharded.cpp             | 12 +++---
 .../copy/multi_core/copy_op_multi_core.cpp    |  4 +-
 .../copy/single_core/copy_op_single_core.cpp  |  4 +-
 .../op_library/downsample/downsample_op.cpp   |  2 +-
 .../eltwise_binary_op_multi_core.cpp          |  4 +-
 .../eltwise_binary_op_single_core.cpp         |  4 +-
 .../eltwise_unary_op_multi_core.cpp           |  4 +-
 .../eltwise_unary_op_single_core.cpp          |  4 +-
 .../op_library/embeddings/embeddings_op.cpp   |  2 +-
 .../tt_dnn/op_library/fill_rm/fill_rm_op.cpp  |  2 +-
 .../op_library/moreh_helper_functions.cpp     | 24 +++++------
 .../op_library/moreh_helper_functions.hpp     | 12 +++---
 .../multi_core/moreh_matmul_op_multi_core.cpp |  2 +-
 .../sum/moreh_sum_multi_core.cpp              |  2 +-
 .../multi_core/move_op_multi_core_overlap.cpp |  2 +-
 tt_eager/tt_dnn/op_library/pad/pad_op.cpp     | 12 +++---
 .../op_library/pad/pad_op_multi_core.cpp      |  4 +-
 .../op_library/pool/max_pool_multi_core.cpp   |  6 +--
 .../max_pool_multi_core_sharded_with_halo.cpp |  2 +-
 .../multi_core_h/reduce_op_multi_core_h.cpp   | 10 ++---
 .../multi_core_w/reduce_op_multi_core_w.cpp   |  4 +-
 .../single_core/reduce_op_single_core.cpp     |  4 +-
 .../tt_dnn/op_library/reshape/reshape_op.cpp  |  8 ++--
 .../rotary_embedding_op_multi_core.cpp        |  4 +-
 .../rotary_embedding_op_single_core.cpp       |  4 +-
 .../rotate_half_op_single_core.cpp            |  4 +-
 .../multi_core/sharded_op_multi_core.cpp      |  8 ++--
 .../tt_dnn/op_library/softmax/softmax_op.cpp  |  6 +--
 .../split/split_last_dim_two_chunks_tiled.cpp |  4 +-
 .../tilize_op_multi_core.cpp                  | 12 +++---
 .../tilize_op_single_core.cpp                 |  8 ++--
 .../cn_multi_core/transpose_cn_multi_core.cpp |  4 +-
 .../transpose_hc_op_multi_core.cpp            |  4 +-
 .../single_core/transpose_op_single_core.cpp  | 12 +++---
 .../transpose_wh_op_multi_core.cpp            |  4 +-
 .../unpad/multi_core/unpad_op_multi_core.cpp  |  8 ++--
 .../single_core/unpad_op_single_core.cpp      |  8 ++--
 .../multi_core/untilize_op_multi_core.cpp     | 10 ++---
 .../single_core/untilize_op_single_core.cpp   |  8 ++--
 .../untilize/untilize_with_halo_op.cpp        | 12 +++---
 .../multi_core/update_cache_op_multi_core.cpp |  4 +-
 .../update_cache_op_single_core.cpp           |  8 ++--
 tt_metal/detail/program.hpp                   |  6 +--
 tt_metal/detail/tt_metal.hpp                  |  2 +-
 tt_metal/host_api.hpp                         | 32 +++++++--------
 tt_metal/impl/buffers/circular_buffer.hpp     |  2 +-
 .../impl/buffers/circular_buffer_types.hpp    |  2 +-
 tt_metal/impl/dispatch/command_queue.cpp      |  8 ++--
 tt_metal/impl/kernels/kernel_types.hpp        |  2 +-
 tt_metal/impl/program/program.cpp             | 34 ++++++++--------
 tt_metal/impl/program/program.hpp             | 40 +++++++++----------
 .../eltwise_binary/eltwise_binary.cpp         | 12 +++---
 .../eltwise_sfpu/eltwise_sfpu.cpp             | 10 ++---
 .../loopback/loopback.cpp                     |  2 +-
 .../matmul_multicore_reuse_mcast.cpp          |  4 +-
 .../test_custom_cycle_count.cpp               |  6 +--
 .../test_full_buffer/test_full_buffer.cpp     |  6 +--
 tt_metal/tt_metal.cpp                         | 24 +++++------
 90 files changed, 330 insertions(+), 330 deletions(-)

diff --git a/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst b/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst
index 89427579ae3..7578c5964e9 100644
--- a/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst
+++ b/docs/source/tt_metal/apis/host_apis/runtime_args/runtime_args.rst
@@ -1,8 +1,8 @@
 Runtime Arguments
 ==================
 
-.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelID kernel_id, const std::variant<CoreCoord,CoreRange,CoreRangeSet> &logical_core, const std::vector<uint32_t> &runtime_args)
+.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const std::variant<CoreCoord,CoreRange,CoreRangeSet> &logical_core, const std::vector<uint32_t> &runtime_args)
 
-.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector<uint32_t> > &runtime_args)
+.. doxygenfunction:: SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector<uint32_t> > &runtime_args)
 
 .. doxygenfunction:: GetRuntimeArgs
diff --git a/docs/source/tt_metal/examples/dram_loopback.rst b/docs/source/tt_metal/examples/dram_loopback.rst
index a1b49adfd9d..68532cc1668 100644
--- a/docs/source/tt_metal/examples/dram_loopback.rst
+++ b/docs/source/tt_metal/examples/dram_loopback.rst
@@ -50,7 +50,7 @@ We will be using the accelerator core with coordinates ``{0, 0}``.
 
     constexpr CoreCoord core = {0, 0};
 
-    KernelID dram_copy_kernel_id = CreateKernel(
+    KernelHandle dram_copy_kernel_id = CreateKernel(
         program,
         "tt_metal/programming_examples/loopback/kernels/loopback_dram_copy.cpp",
         core,
diff --git a/docs/source/tt_metal/examples/eltwise_binary.rst b/docs/source/tt_metal/examples/eltwise_binary.rst
index 06d70314aef..abda3b25e99 100644
--- a/docs/source/tt_metal/examples/eltwise_binary.rst
+++ b/docs/source/tt_metal/examples/eltwise_binary.rst
@@ -26,19 +26,19 @@ We already have set the circular buffers needed for compute data communication.
   constexpr uint32_t num_input_tiles = 2;
   constexpr uint32_t input_cb_size = num_input_tiles * single_tile_size;
   CircularBufferConfig cb_src0_config = CircularBufferConfig(input_cb_size, {{src0_cb_index, tt::DataFormat::Float16_b}}, src0_cb_addr).set_page_size(src0_cb_index, single_tile_size);
-  CircularBufferID cb_src0 = CreateCircularBuffer(program, core, cb_src0_config);
+  CBHandle cb_src0 = CreateCircularBuffer(program, core, cb_src0_config);
 
   constexpr uint32_t src1_cb_index = CB::c_in1;
   constexpr uint32_t src1_cb_addr = 300 * 1024;
   CircularBufferConfig cb_src1_config = CircularBufferConfig(input_cb_size, {{src1_cb_index, tt::DataFormat::Float16_b}}, src1_cb_addr).set_page_size(src1_cb_index, single_tile_size);
-  CircularBufferID cb_src1 = CreateCircularBuffer(program, core, cb_src1_config);
+  CBHandle cb_src1 = CreateCircularBuffer(program, core, cb_src1_config);
 
   constexpr uint32_t output_cb_index = CB::c_out0;
   constexpr uint32_t output_cb_addr = 400 * 1024;
   constexpr uint32_t num_output_tiles = 2;
   constexpr uint32_t input_cb_size = num_input_tiles * single_tile_size;
   CircularBufferConfig cb_output_config = CircularBufferConfig(input_cb_size, {{output_cb_index, tt::DataFormat::Float16_b}}, output_cb_addr).set_page_size(output_cb_index, single_tile_size);
-  CircularBufferID cb_output = CreateCircularBuffer(program, core, cb_output);
+  CBHandle cb_output = CreateCircularBuffer(program, core, cb_output);
 
 We will create two input circular buffers to accommodate our two input tensors,
 and an output one for the result of the eltwise binary operation.
@@ -48,7 +48,7 @@ Compute kernel declaration and compile-time defines
 
 .. code-block:: cpp
 
-    KernelID eltwise_binary_kernel_id = CreateKernel(
+    KernelHandle eltwise_binary_kernel_id = CreateKernel(
         program,
         "tt_metal/kernels/compute/eltwise_binary.cpp",
         core,
diff --git a/docs/source/tt_metal/examples/eltwise_sfpu.rst b/docs/source/tt_metal/examples/eltwise_sfpu.rst
index 9099b9f5e40..db40eb0e63d 100644
--- a/docs/source/tt_metal/examples/eltwise_sfpu.rst
+++ b/docs/source/tt_metal/examples/eltwise_sfpu.rst
@@ -23,12 +23,12 @@ compute, and writer engines.
     constexpr uint32_t src0_cb_index = CB::c_in0;
     constexpr uint32_t num_input_tiles = 2;
     CircularBufferConfig cb_src0_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src0_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src0_cb_index, single_tile_size);
-    CircularBufferID cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);
+    CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);
 
     constexpr uint32_t output_cb_index = CB::c_out0;
     constexpr uint32_t num_output_tiles = 2;
     CircularBufferConfig cb_output_config = CircularBufferConfig(num_output_tiles * single_tile_size, {{output_cb_index, tt::DataFormat::Float16_b}}).set_page_size(output_cb_index, single_tile_size);
-    CircularBufferID cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config);
+    CBHandle cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config);
 
 We will create one input circular buffers to accommodate our input tensor,
 and an output one for the result of the eltwise sfpu operation.
@@ -59,7 +59,7 @@ Compute kernel declaration and compile-time defines
         {"SFPU_OP_CHAIN_0", "exp_tile_init(); exp_tile(0);"}
     };
 
-    KernelID eltwise_sfpu_kernel_id = CreateKernel(
+    KernelHandle eltwise_sfpu_kernel_id = CreateKernel(
         program,
         "tt_metal/kernels/compute/eltwise_sfpu.cpp",
         core,
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp
index 2901feeec4b..3eb8f994a6c 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/matmul/matmul_global_l1.cpp
@@ -400,8 +400,8 @@ tt_metal::Program create_program_mcast_in0_in1(
           .noc = tt_metal::NOC::RISCV_0_default,
           .compile_args = in0_receiver_compile_time_args});
 
-  KernelID mm_kernel_in1_receiver_writer_other_noc_setup_id = 0;
-  KernelID mm_kernel_in0_receiver_other_noc_setup_id = 0;
+  KernelHandle mm_kernel_in1_receiver_writer_other_noc_setup_id = 0;
+  KernelHandle mm_kernel_in0_receiver_other_noc_setup_id = 0;
 
   if (split_half) {
     mm_kernel_in1_receiver_writer_other_noc_setup_id =
@@ -586,8 +586,8 @@ tt_metal::Program create_program_mcast_in0_in1(
       (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) *
       (per_core_N * out_subblock_h);
 
-  std::vector<KernelID> reader_kernel_ids;
-  std::vector<KernelID> writer_kernel_ids;
+  std::vector<KernelHandle> reader_kernel_ids;
+  std::vector<KernelHandle> writer_kernel_ids;
   for (int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) {
     for (int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) {
       CoreCoord core = {(std::size_t)start_core_x + core_idx_x,
diff --git a/tests/tt_metal/tt_metal/test_add_two_ints.cpp b/tests/tt_metal/tt_metal/test_add_two_ints.cpp
index 20d652f2e5e..d7857904fe9 100644
--- a/tests/tt_metal/tt_metal/test_add_two_ints.cpp
+++ b/tests/tt_metal/tt_metal/test_add_two_ints.cpp
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
         std::vector<uint32_t> first_runtime_args = {101, 202};
         std::vector<uint32_t> second_runtime_args = {303, 606};
 
-        tt_metal::KernelID add_two_ints_kernel = tt_metal::CreateKernel(
+        tt_metal::KernelHandle add_two_ints_kernel = tt_metal::CreateKernel(
             program, "tests/tt_metal/tt_metal/test_kernels/misc/add_two_ints.cpp", core,
             tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default});
 
diff --git a/tests/tt_metal/tt_metal/test_compile_args.cpp b/tests/tt_metal/tt_metal/test_compile_args.cpp
index a61ca1982c3..b3bde88a8de 100644
--- a/tests/tt_metal/tt_metal/test_compile_args.cpp
+++ b/tests/tt_metal/tt_metal/test_compile_args.cpp
@@ -33,14 +33,14 @@ bool test_compile_args(std::vector<uint32_t> compile_args_vec, int device_id) {
 
     CoreCoord core = {0, 0};
 
-    tt_metal::KernelID unary_reader_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel = tt_metal::CreateKernel(
         program,
         "tests/tt_metal/tt_metal/test_kernels/dataflow/test_compile_args.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = compile_args_vec});
 
 
-    tt_metal::KernelID unary_writer_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel = tt_metal::CreateKernel(
         program, "tt_metal/kernels/dataflow/blank.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default});
diff --git a/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp b/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp
index 459e9f49679..f955069ad10 100644
--- a/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp
+++ b/tests/tt_metal/tt_metal/test_datacopy_multi_core_multi_dram.cpp
@@ -84,7 +84,7 @@ std::vector<bfloat16> select_columns(std::vector<bfloat16> data, int M, int K, i
     return result;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> create_program(
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle> create_program(
     tt_metal::Device *device,
     int num_cores_r,
     int num_cores_c,
@@ -171,8 +171,8 @@ bool write_runtime_args_to_device(
     tt_metal::Program &program,
     int num_cores_r,
     int num_cores_c,
-    tt_metal::KernelID reader_kernel,
-    tt_metal::KernelID writer_kernel,
+    tt_metal::KernelHandle reader_kernel,
+    tt_metal::KernelHandle writer_kernel,
     int tensor_num_tiles,
     int block_num_tiles,
     int Ht,
diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp
index 3471331f97a..984c24dfbbe 100644
--- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp
+++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram.cpp
@@ -86,7 +86,7 @@ std::vector<bfloat16> select_columns(std::vector<bfloat16> data, int M, int K, i
     return result;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> create_program(
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle> create_program(
     tt_metal::Device *device,
     int num_cores_r,
     int num_cores_c,
@@ -193,8 +193,8 @@ bool assign_runtime_args_to_program(
     tt_metal::Program &program,
     int num_cores_r,
     int num_cores_c,
-    tt_metal::KernelID mm_reader_kernel,
-    tt_metal::KernelID unary_writer_kernel,
+    tt_metal::KernelHandle mm_reader_kernel,
+    tt_metal::KernelHandle unary_writer_kernel,
     int M,
     int N,
     int K,
diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp
index b93740cb261..ef48a028825 100644
--- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp
+++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast.cpp
@@ -86,7 +86,7 @@ std::vector<bfloat16> select_columns(std::vector<bfloat16> data, int M, int K, i
     return result;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID, tt_metal::KernelID> create_program(
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle, tt_metal::KernelHandle> create_program(
     tt_metal::Device *device,
     int start_core_x,
     int start_core_y,
@@ -216,9 +216,9 @@ bool write_runtime_args_to_device(
     int start_core_y,
     int num_cores_r,
     int num_cores_c,
-    tt_metal::KernelID mm_reader_kernel_sender,
-    tt_metal::KernelID mm_reader_kernel_receiver,
-    tt_metal::KernelID unary_writer_kernel,
+    tt_metal::KernelHandle mm_reader_kernel_sender,
+    tt_metal::KernelHandle mm_reader_kernel_receiver,
+    tt_metal::KernelHandle unary_writer_kernel,
     int M,
     int N,
     int K,
diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp
index 74693f349e6..a1015937ebf 100644
--- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp
+++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast.cpp
@@ -85,7 +85,7 @@ std::vector<bfloat16> select_columns(std::vector<bfloat16> data, int M, int K, i
     return result;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID, tt_metal::KernelID, tt_metal::KernelID, tt_metal::KernelID, tt_metal::KernelID> create_program(
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle, tt_metal::KernelHandle, tt_metal::KernelHandle, tt_metal::KernelHandle, tt_metal::KernelHandle> create_program(
     tt_metal::Device *device,
     int start_core_x,
     int start_core_y,
@@ -250,12 +250,12 @@ bool write_runtime_args_to_device(
     int start_core_y,
     int num_cores_r,
     int num_cores_c,
-    tt_metal::KernelID mm_reader_kernel_in0_sender_in1_sender,
-    tt_metal::KernelID mm_reader_kernel_in0_sender_in1_receiver,
-    tt_metal::KernelID mm_reader_kernel_in0_receiver_in1_sender,
-    tt_metal::KernelID mm_reader_kernel_in0_receiver_in1_receiver,
-    tt_metal::KernelID unary_writer_kernel_noc0,
-    tt_metal::KernelID unary_writer_kernel_noc1,
+    tt_metal::KernelHandle mm_reader_kernel_in0_sender_in1_sender,
+    tt_metal::KernelHandle mm_reader_kernel_in0_sender_in1_receiver,
+    tt_metal::KernelHandle mm_reader_kernel_in0_receiver_in1_sender,
+    tt_metal::KernelHandle mm_reader_kernel_in0_receiver_in1_receiver,
+    tt_metal::KernelHandle unary_writer_kernel_noc0,
+    tt_metal::KernelHandle unary_writer_kernel_noc1,
     int M,
     int N,
     int K,
diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp
index eb67c7629fd..5bf60e880b2 100644
--- a/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp
+++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast.cpp
@@ -85,7 +85,7 @@ std::vector<bfloat16> select_columns(std::vector<bfloat16> data, int M, int K, i
     return result;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID, tt_metal::KernelID> create_program(
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle, tt_metal::KernelHandle> create_program(
     tt_metal::Device *device,
     int start_core_x,
     int start_core_y,
@@ -216,9 +216,9 @@ bool write_runtime_args_to_device(
     int start_core_y,
     int num_cores_r,
     int num_cores_c,
-    tt_metal::KernelID mm_reader_kernel_sender,
-    tt_metal::KernelID mm_reader_kernel_receiver,
-    tt_metal::KernelID unary_writer_kernel,
+    tt_metal::KernelHandle mm_reader_kernel_sender,
+    tt_metal::KernelHandle mm_reader_kernel_receiver,
+    tt_metal::KernelHandle unary_writer_kernel,
     int M,
     int N,
     int K,
diff --git a/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp b/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp
index 809fe627582..7ea69d406a1 100644
--- a/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp
+++ b/tests/tt_metal/tt_metal/test_matmul_multi_core_single_dram.cpp
@@ -152,7 +152,7 @@ std::vector<bfloat16> select_columns(std::vector<bfloat16> data, int M, int K, i
     return result;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID , tt_metal::KernelID> create_program(tt_metal::Device *device, int num_cores_r, int num_cores_c, int per_core_M, int per_core_N, int K, int in0_block_w, int out_subblock_h, int out_subblock_w) {
+std::tuple<tt_metal::Program, tt_metal::KernelHandle , tt_metal::KernelHandle> create_program(tt_metal::Device *device, int num_cores_r, int num_cores_c, int per_core_M, int per_core_N, int K, int in0_block_w, int out_subblock_h, int out_subblock_w) {
     tt_metal::Program program = tt_metal::CreateProgram();
     uint32_t single_tile_size = 2 * 1024;
     uint32_t in0_block_tiles = per_core_M * in0_block_w;
diff --git a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
index a8ef950d59e..4e710ec484c 100644
--- a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
+++ b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
@@ -18,7 +18,7 @@
 //////////////////////////////////////////////////////////////////////////////////////////
 using namespace tt;
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> create_program(
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle> create_program(
     tt_metal::Device *device,
     uint32_t single_tile_size,
     const CoreRange &all_cores,
@@ -87,7 +87,7 @@ void compile_and_configure_program(
 
 }
 
-void set_rt_args(tt_metal::Program &program, tt_metal::KernelID kernel, const CoreRange &core_range, const std::vector<uint32_t> &rt_args) {
+void set_rt_args(tt_metal::Program &program, tt_metal::KernelHandle kernel, const CoreRange &core_range, const std::vector<uint32_t> &rt_args) {
     for (auto x = core_range.start.x; x <= core_range.end.x; x++) {
         for (auto y = core_range.start.y; y <= core_range.end.y; y++) {
             CoreCoord core = CoreCoord(x, y);
@@ -99,8 +99,8 @@ void set_rt_args(tt_metal::Program &program, tt_metal::KernelID kernel, const Co
 void write_same_runtime_args_to_device(
     tt_metal::Device *device,
     tt_metal::Program &program,
-    tt_metal::KernelID reader_kernel_id,
-    tt_metal::KernelID writer_kernel_id,
+    tt_metal::KernelHandle reader_kernel_id,
+    tt_metal::KernelHandle writer_kernel_id,
     const CoreRange &core_range,
     int32_t num_tiles,
     tt_metal::Buffer &src_dram_buffer,
@@ -129,8 +129,8 @@ void write_same_runtime_args_to_device(
 void write_unique_writer_runtime_args_to_device(
     tt_metal::Device *device,
     tt_metal::Program &program,
-    tt_metal::KernelID reader_kernel_id,
-    tt_metal::KernelID writer_kernel_id,
+    tt_metal::KernelHandle reader_kernel_id,
+    tt_metal::KernelHandle writer_kernel_id,
     const CoreRange &core_range,
     const CoreRangeSet &core_blocks,
     int32_t num_tiles,
diff --git a/tests/tt_metal/tt_metal/test_multiple_programs.cpp b/tests/tt_metal/tt_metal/test_multiple_programs.cpp
index 0559aee7f00..fade0d13dc9 100644
--- a/tests/tt_metal/tt_metal/test_multiple_programs.cpp
+++ b/tests/tt_metal/tt_metal/test_multiple_programs.cpp
@@ -35,7 +35,7 @@ std::map<string, string> get_defines(BinaryOpType::Enum op_type){
 }
 
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> setup_program_one(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) {
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle> setup_program_one(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) {
     tt_metal::Program program = tt_metal::CreateProgram();
 
     uint32_t src0_cb_index = 0;
@@ -88,7 +88,7 @@ std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> setup_prog
     return {std::move(program), binary_reader_kernel, unary_writer_kernel};
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> setup_program_two(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) {
+std::tuple<tt_metal::Program, tt_metal::KernelHandle, tt_metal::KernelHandle> setup_program_two(tt_metal::Device *device, const CoreCoord &core, uint32_t single_tile_size) {
     tt_metal::Program program = tt_metal::CreateProgram();
 
     uint32_t src0_cb_index = 0;
@@ -143,8 +143,8 @@ std::tuple<tt_metal::Program, tt_metal::KernelID, tt_metal::KernelID> setup_prog
 void write_program_runtime_args_to_device(
     tt_metal::Device *device,
     tt_metal::Program &program,
-    tt_metal::KernelID reader_kernel_id,
-    tt_metal::KernelID writer_kernel_id,
+    tt_metal::KernelHandle reader_kernel_id,
+    tt_metal::KernelHandle writer_kernel_id,
     const CoreCoord &core,
     uint32_t num_tiles,
     tt_metal::Buffer &src0_dram_buffer,
diff --git a/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp b/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp
index 5e0e4a5ae22..5458fe3a3ab 100644
--- a/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/basic/device.cpp
@@ -315,7 +315,7 @@ TEST_F(DeviceFixture, ValidateKernelDoesNotTargetHarvestedCores) {
         CoreCoord logical_target_core = CoreCoord({.x = 0, .y = 0});
         uint32_t intermediate_l1_addr = L1_UNRESERVED_BASE;
         uint32_t size_bytes = host_input.size() * sizeof(uint32_t);
-        tt_metal::KernelID kernel_id = tt_metal::CreateKernel(
+        tt_metal::KernelHandle kernel_id = tt_metal::CreateKernel(
             program,
             kernel_name,
             logical_target_core,
diff --git a/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp b/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp
index 0ba4f3959c7..a54cacb65d2 100644
--- a/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/circular_buffer/test_CircularBuffer_allocation.cpp
@@ -180,7 +180,7 @@ TEST_F(DeviceFixture, TestValidUpdateCircularBufferSize) {
 
     const uint32_t core0_num_cbs = 2;
     std::map<CoreCoord, std::map<uint8_t, uint32_t>> golden_addresses_per_core;
-    std::vector<CircularBufferID> cb_ids;
+    std::vector<CBHandle> cb_ids;
     auto expected_cb_addr = L1_UNRESERVED_BASE;
     for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) {
         CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size);
@@ -213,7 +213,7 @@ TEST_F(DeviceFixture, TestInvalidUpdateCircularBufferSize) {
 
     const uint32_t core0_num_cbs = 2;
     std::map<CoreCoord, std::map<uint8_t, uint32_t>> golden_addresses_per_core;
-    std::vector<CircularBufferID> cb_ids;
+    std::vector<CBHandle> cb_ids;
     auto expected_cb_addr = L1_UNRESERVED_BASE;
     for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) {
         CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size);
@@ -246,7 +246,7 @@ TEST_F(DeviceFixture, TestUpdateCircularBufferAddress) {
 
     const uint32_t core0_num_cbs = 2;
     std::map<CoreCoord, std::map<uint8_t, uint32_t>> golden_addresses_per_core;
-    std::vector<CircularBufferID> cb_ids;
+    std::vector<CBHandle> cb_ids;
     auto expected_cb_addr = L1_UNRESERVED_BASE;
     for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) {
         CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size);
@@ -277,7 +277,7 @@ TEST_F(DeviceFixture, TestUpdateCircularBufferPageSize) {
     const uint32_t core0_num_cbs = 2;
     std::map<CoreCoord, std::map<uint8_t, uint32_t>> golden_addresses_per_core;
     std::map<CoreCoord, std::map<uint8_t, uint32_t>> golden_num_pages_per_core;
-    std::vector<CircularBufferID> cb_ids;
+    std::vector<CBHandle> cb_ids;
     auto expected_cb_addr = L1_UNRESERVED_BASE;
     for (uint32_t buffer_idx = 0; buffer_idx < core0_num_cbs; buffer_idx++) {
         CircularBufferConfig config1 = CircularBufferConfig(cb_config.page_size, {{buffer_idx, cb_config.data_format}}).set_page_size(buffer_idx, cb_config.page_size);
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp
index 2fd7feee4df..a597efb0353 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp
@@ -117,7 +117,7 @@ bool test_dummy_EnqueueProgram_with_cbs_update_size(Device* device, CommandQueue
     Program program;
 
 
-    std::vector<CircularBufferID> cb_ids;
+    std::vector<CBHandle> cb_ids;
     for (uint32_t i = 0; i < program_config.cb_config_vector.size(); i++) {
         uint32_t cb_id = program_config.cb_config_vector[i].cb_id;
         uint32_t cb_num_pages = program_config.cb_config_vector[i].num_pages;
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp
index 2abdcf2f2f0..3df8ef3245c 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_print_all_harts.cpp
@@ -35,19 +35,19 @@ TEST_F(CommandQueueWithDPrintFixture, TestPrintFromAllHarts) {
     // Three different kernels to mirror typical usage and some previously
     // failing test cases, although all three kernels simply print.
     constexpr CoreCoord core = {0, 0}; // Print on first core only
-    KernelID brisc_print_kernel_id = CreateKernel(
+    KernelHandle brisc_print_kernel_id = CreateKernel(
         program,
         "tests/tt_metal/tt_metal/test_kernels/misc/brisc_print.cpp",
         core,
         DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default}
     );
-    KernelID ncrisc_print_kernel_id = CreateKernel(
+    KernelHandle ncrisc_print_kernel_id = CreateKernel(
         program,
         "tests/tt_metal/tt_metal/test_kernels/misc/ncrisc_print.cpp",
         core,
         DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default}
     );
-    KernelID trisc_print_kernel_id = CreateKernel(
+    KernelHandle trisc_print_kernel_id = CreateKernel(
         program,
         "tests/tt_metal/tt_metal/test_kernels/misc/trisc_print.cpp",
         core,
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp
index fa0a8b8d74a..0af2eabccf8 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp
@@ -33,7 +33,7 @@ try{
 
     // Two kernels - one for brisc and one for ncrisc. Nothing for triscs in
     // this test.
-    KernelID brisc_kernel_id = CreateKernel(
+    KernelHandle brisc_kernel_id = CreateKernel(
         program,
         "tests/tt_metal/tt_metal/test_kernels/misc/dprint_raise_wait_brisc.cpp",
         CoreRange{
@@ -45,7 +45,7 @@ try{
             .noc = NOC::RISCV_0_default
         }
     );
-    KernelID ncrisc_kernel_id = CreateKernel(
+    KernelHandle ncrisc_kernel_id = CreateKernel(
         program,
         "tests/tt_metal/tt_metal/test_kernels/misc/dprint_raise_wait_ncrisc.cpp",
         CoreRange{
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp
index db348d1870f..6dbbf5f0882 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/pipelining/basic_pipeline.cpp
@@ -97,8 +97,8 @@ void create_and_run_row_pipeline(tt_metal::Device* device, const PipelineRowConf
     dst_noc_xy = dst_buffer.noc_coordinates();
 
     // create kernels
-    vector<tt_metal::KernelID> receiver_kernels;
-    vector<tt_metal::KernelID> sender_kernels;
+    vector<tt_metal::KernelHandle> receiver_kernels;
+    vector<tt_metal::KernelHandle> sender_kernels;
     for (int core_id = 0; core_id < num_cores; core_id++) {
         string receiver_kernel_name;
         if (core_id == 0) {
diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp
index e495173e616..3b02137b2ce 100644
--- a/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp
+++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp
@@ -85,13 +85,13 @@ operation::ProgramWithCallbacks bcast_multi_core_h(const Tensor &a, const Tensor
     bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
     std::vector<uint32_t> writer_compile_time_args = {(uint32_t)dst_is_dram};
 
-	KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+	KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
 		program,
 		reader_name,
 		all_device_cores,
 		tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-	KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+	KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
 		program,
 		"tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp",
 		all_device_cores,
diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp
index 805bb854898..86dd099f4fa 100644
--- a/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp
+++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp
@@ -95,13 +95,13 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso
 		reader_defines["BCAST_SCALAR"] = "1";
 		bcast_compute_defines["BCAST_SCALAR"] = "1";
 	}
-	KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+	KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
 		program,
 		reader_name,
 		all_device_cores,
 		tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines});
 
-	KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+	KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
 		program,
 		"tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
 		all_device_cores,
diff --git a/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp b/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp
index 72b60df9be5..8f9663b22dd 100644
--- a/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp
+++ b/tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp
@@ -86,13 +86,13 @@ operation::ProgramWithCallbacks bcast_multi_core_w(const Tensor &a, const Tensor
     bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
     std::vector<uint32_t> writer_compile_time_args = {(uint32_t)dst_is_dram};
 
-	KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+	KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
 		program,
 		reader_name,
 		all_device_cores,
 		tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-	KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+	KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
 		program,
 		"tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp",
 		all_device_cores,
diff --git a/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp b/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp
index 2abd6e622a5..4be64942e7b 100644
--- a/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/bcast/single_core/bcast_op_single_core.cpp
@@ -77,13 +77,13 @@ operation::ProgramWithCallbacks bcast_single_core(const Tensor &a, const Tensor
     };
 
     const char* reader_name = bcast_op_utils::get_reader_name(bcast_dim, BcastOpParallelizationStrategy::SINGLE_CORE);
-    KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+    KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         reader_name,
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp
index 542ed7d31a3..07c8a5b0928 100644
--- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp
+++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp
@@ -260,7 +260,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0(
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines});
 
 
-    KernelID mm_kernel_in0_receiver_id = 0;
+    KernelHandle mm_kernel_in0_receiver_id = 0;
     if (!in0_is_sharded) {
         mm_kernel_in0_receiver_id = tt_metal::CreateKernel(
             program,
@@ -320,7 +320,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0(
     auto cb_src1 = tt_metal::CreateCircularBuffer(program, all_cores, src1_cb_config);
 
     uint32_t src2_cb_index = 2;
-    CircularBufferID cb_src2 = 0;
+    CBHandle cb_src2 = 0;
     if (in0_is_sharded) {
         tt_metal::CircularBufferConfig src2_cb_config = tt_metal::CircularBufferConfig(in2_CB_size, {{src2_cb_index, in0_data_format}})
             .set_page_size(src2_cb_index, in0_single_tile_size).set_globally_allocated_address(*in0_buffer);
@@ -359,8 +359,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0(
     uint32_t last_block_padded_block_tiles_w_skip =  (out_subblock_w * out_subblock_h) * (per_core_N / out_subblock_w - last_block_num_nonzero_subblocks_w);
     uint32_t last_block_padded_block_tiles_h_skip = (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
 
     std::vector<uint32_t> in0_mcast_noc_x;
     std::vector<uint32_t> in0_mcast_noc_y;
@@ -873,8 +873,8 @@ operation::ProgramWithCallbacks create_program_mcast_in1(
     uint32_t last_block_padded_block_tiles_w_skip =  (out_subblock_w * out_subblock_h) * (per_core_N / out_subblock_w - last_block_num_nonzero_subblocks_w);
     uint32_t last_block_padded_block_tiles_h_skip = (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
     for(uint32_t i = 0; i < num_cores; i++) {
         uint32_t core_idx_x = i % num_cores_c;
         uint32_t core_idx_y = i / num_cores_c;
diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp
index 039981c7de0..d30c76d6869 100644
--- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp
+++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp
@@ -349,7 +349,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1(
         (CoreRangeSet) (std::set<CoreRange>) {in0_sender_in1_receiver, in0_receiver_in1_receiver_left_half},
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = in1_noc, .compile_args = in1_receiver_writer_compile_time_args, .defines = mm_kernel_in1_receiver_writer_defines});
 
-    KernelID mm_kernel_in0_receiver_id = 0;
+    KernelHandle mm_kernel_in0_receiver_id = 0;
     if (!in0_is_sharded) {
         mm_kernel_in0_receiver_id = tt_metal::CreateKernel(
             program,
@@ -359,8 +359,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1(
             tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = in0_noc, .compile_args = in0_receiver_compile_time_args});
     }
 
-    KernelID mm_kernel_in1_receiver_writer_other_noc_setup_id = mm_kernel_in1_receiver_writer_id;
-    KernelID mm_kernel_in0_receiver_other_noc_setup_id = mm_kernel_in0_receiver_id;
+    KernelHandle mm_kernel_in1_receiver_writer_other_noc_setup_id = mm_kernel_in1_receiver_writer_id;
+    KernelHandle mm_kernel_in0_receiver_other_noc_setup_id = mm_kernel_in0_receiver_id;
 
     if (split_half) {
         mm_kernel_in1_receiver_writer_other_noc_setup_id = tt_metal::CreateKernel(
@@ -427,7 +427,7 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1(
     auto cb_src1 = tt_metal::CreateCircularBuffer(program, all_cores, src1_cb_config);
 
     uint32_t src2_cb_index = 2;
-    CircularBufferID cb_src2 = 0;
+    CBHandle cb_src2 = 0;
     if (in0_is_sharded) {
         tt_metal::CircularBufferConfig src2_cb_config = tt_metal::CircularBufferConfig(in2_CB_size, {{src2_cb_index, in0_data_format}})
             .set_page_size(src2_cb_index, in0_single_tile_size).set_globally_allocated_address(*in0_buffer);
@@ -467,8 +467,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1(
     uint32_t last_block_padded_block_tiles_w_skip =  (out_subblock_w * out_subblock_h) * (per_core_N / out_subblock_w - last_block_num_nonzero_subblocks_w);
     uint32_t last_block_padded_block_tiles_h_skip = (per_core_M / out_subblock_h - last_block_num_nonzero_subblocks_h) * (per_core_N * out_subblock_h);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
 
     uint32_t diff_start_coord;
     uint32_t diff_end_coord;
diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp
index 7a302a50c61..d9788814e2c 100644
--- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp
+++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_generalized/bmm_op_multi_core_reuse_mcast_generalized.cpp
@@ -187,8 +187,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1(
         .set_page_size(interm0_cb_index, single_tile_size);
     auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
     for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) {
         for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) {
             CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y};
@@ -473,8 +473,8 @@ operation::ProgramWithCallbacks create_program_mcast_in0(
         .set_page_size(interm0_cb_index, single_tile_size);
     auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
     for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) {
         for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) {
             CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y};
@@ -733,8 +733,8 @@ operation::ProgramWithCallbacks create_program_mcast_in1(
         .set_page_size(interm0_cb_index, single_tile_size);
     auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
     for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) {
         for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) {
             CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y};
diff --git a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp
index 21e082a683c..df23054ec94 100644
--- a/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp
+++ b/tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp
@@ -205,8 +205,8 @@ operation::ProgramWithCallbacks create_program(
         .set_page_size(interm0_cb_index, output_single_tile_size);
     auto cb_output = tt_metal::CreateCircularBuffer(program, CoreRangeSet({all_cores}), cb_output_config);
 
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
     for (uint32_t i = 0, num_blocks_written = 0; i < num_cores; i++){
         uint32_t core_idx_x = i / core_range.y;
         uint32_t core_idx_y = i % core_range.y;
diff --git a/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp
index 1659ab88e84..3c705e24448 100644
--- a/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp
@@ -95,13 +95,13 @@ operation::ProgramWithCallbacks concat_multi_core(const std::vector<Tensor> &inp
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_concat_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp b/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp
index 8f39b7072af..5ec76ed4d76 100644
--- a/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/concat/single_core/concat_op_single_core.cpp
@@ -95,13 +95,13 @@ operation::ProgramWithCallbacks concat_single_core(const std::vector<Tensor> &in
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_concat_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp
index a1938b88d59..4f3c78e8378 100644
--- a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp
+++ b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp
@@ -37,7 +37,7 @@ const uint32_t untilize_mode_reblock_cb               = CB::c_intermed2;
 const uint32_t out0_cb                                = CB::c_out0;
 
 
-tuple<CircularBufferID, CircularBufferID> create_CBs(tt_metal::Program &program,
+tuple<CBHandle, CBHandle> create_CBs(tt_metal::Program &program,
                                 const Tensor& input,
                                 CoreRange core,
                                 uint32_t num_cb0_tiles,
@@ -68,8 +68,8 @@ tuple<CircularBufferID, CircularBufferID> create_CBs(tt_metal::Program &program,
 		.set_page_size(act_cb, act_tile_size);
     auto cb_act = tt_metal::CreateCircularBuffer(program, core, cb_act_config);
 
-    CircularBufferID cb_sharded_act = 0;
-    CircularBufferID cb_sharded_act_mcast_receiver = 0;
+    CBHandle cb_sharded_act = 0;
+    CBHandle cb_sharded_act_mcast_receiver = 0;
     if (input.is_sharded()) {
         uint32_t num_bytes_for_df = datum_size(act_df);
         auto shard_shape = input.shard_spec().value().shard_shape;
@@ -96,7 +96,7 @@ tuple<CircularBufferID, CircularBufferID> create_CBs(tt_metal::Program &program,
 		.set_page_size(tilize_mode_tilized_act_cb, tilized_act_tile_size);
     auto cb_src0_tilized = tt_metal::CreateCircularBuffer(program, core, cb_src0_tilized_config);
 
-    CircularBufferID cb_output = 0;
+    CBHandle cb_output = 0;
     if (untilize_out) {
         CircularBufferConfig cb_matmul_partials_config = CircularBufferConfig(num_output_tiles * out_tile_size, {{matmul_partials_cb, out_df}})
 		    .set_page_size(matmul_partials_cb, out_tile_size);
@@ -738,7 +738,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_(const Tensor& a, cons
         .compile_args = writer_compile_time_args,
         .defines = writer_mcast_sender_defines});
 
-    KernelID writer_mcast_receiver_id;
+    KernelHandle writer_mcast_receiver_id;
     if (total_num_cores > 1) {
         writer_mcast_receiver_id = CreateKernel(
         program,
@@ -779,8 +779,8 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_(const Tensor& a, cons
         noop_cores, ComputeConfig{});
     }
 
-    vector<KernelID> reader_ids;
-    vector<KernelID> writer_ids;
+    vector<KernelHandle> reader_ids;
+    vector<KernelHandle> writer_ids;
     //tt_start_debug_print_server();
     for(uint32_t core_i = 0; core_i < total_num_cores; core_i++) {
         uint32_t core_x_i = core_i % num_cores_x;
diff --git a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp
index 4dbb23eaa3a..253acdf78d2 100644
--- a/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp
+++ b/tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp
@@ -37,7 +37,7 @@ const uint32_t untilize_mode_reblock_cb               = CB::c_intermed2;
 const uint32_t out0_cb                                = CB::c_out0;
 
 
-tuple<CircularBufferID, CircularBufferID> create_CBs_for_sharded_input(
+tuple<CBHandle, CBHandle> create_CBs_for_sharded_input(
     tt_metal::Program &program,
     const Tensor& input,
     CoreRange core,
@@ -64,7 +64,7 @@ tuple<CircularBufferID, CircularBufferID> create_CBs_for_sharded_input(
     uint32_t tilized_act_tile_size = tt_metal::detail::TileSize(tilized_act_df);
     uint32_t out_tile_size = tt_metal::detail::TileSize(out_df);
 
-    CircularBufferID cb_sharded_act = 0;
+    CBHandle cb_sharded_act = 0;
     if (input.memory_config().is_sharded()) {
         uint32_t num_bytes_for_df = datum_size(act_df);
         auto shard_shape = input.shard_spec().value().shard_shape;
@@ -111,7 +111,7 @@ tuple<CircularBufferID, CircularBufferID> create_CBs_for_sharded_input(
 		.set_page_size(tilize_mode_tilized_act_cb, tilized_act_tile_size);
     auto cb_src0_tilized = tt_metal::CreateCircularBuffer(program, core, cb_src0_tilized_config);
 
-    CircularBufferID cb_output = 0;
+    CBHandle cb_output = 0;
     if (untilize_out) {
         CircularBufferConfig cb_matmul_partials_config = CircularBufferConfig(num_output_tiles * out_tile_size, {{matmul_partials_cb, out_df}})
 		    .set_page_size(matmul_partials_cb, out_tile_size);
@@ -782,7 +782,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_(const Tensor&
         .compile_args = writer_compile_time_args,
         .defines = writer_mcast_sender_defines});
 
-    KernelID writer_mcast_receiver_id;
+    KernelHandle writer_mcast_receiver_id;
     if (total_num_cores > 1) {
         writer_mcast_receiver_id = CreateKernel(
         program,
@@ -823,8 +823,8 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_(const Tensor&
         noop_cores, ComputeConfig{});
     }
 
-    vector<KernelID> reader_ids;
-    vector<KernelID> writer_ids;
+    vector<KernelHandle> reader_ids;
+    vector<KernelHandle> writer_ids;
     //tt_start_debug_print_server();
     for(uint32_t core_i = 0; core_i < total_num_cores; core_i++) {
         uint32_t core_x_i = core_i % num_cores_x;
diff --git a/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp
index a82ccfe33dc..cb1c4924d94 100644
--- a/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp
@@ -87,13 +87,13 @@ operation::ProgramWithCallbacks copy_multi_core(const Tensor &input, const Tenso
     if (backwards) {
         kernel_defines["BACKWARDS"] = "1";
     }
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         tilized ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = kernel_defines});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         tilized ? "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp b/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp
index fa283f44d10..3a1d0115438 100644
--- a/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp
@@ -84,13 +84,13 @@ operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tens
     if (backwards) {
         kernel_defines["BACKWARDS"] = "1";
     }
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         tilized ? "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = kernel_defines});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         tilized ? "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp b/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp
index d62e9e7a96a..cc0427e0e9d 100644
--- a/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp
+++ b/tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp
@@ -515,7 +515,7 @@ operation::ProgramWithCallbacks downsample_single_core(const Tensor &a, std::arr
     };
 
     // Writer to downsample - drops rows from untilized cb
-    tt_metal::KernelID downsample_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle downsample_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/downsample/kernels/downsample_writer_kernel.cpp",
         core_range,
diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp
index 52af678e2d2..7311174c8b5 100644
--- a/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp
@@ -132,13 +132,13 @@ operation::ProgramWithCallbacks eltwise_binary_multi_core(const Tensor &a, const
         (std::uint32_t) dst_is_dram
     };
 
-    KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+    KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_binary_interleaved_start_id.cpp",
         all_device_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines});
 
-    KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         (block_sharded and not out_sharded) ? "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded_blocks_interleaved_start_id.cpp" : "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_device_cores,
diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp
index 420b5e144e2..d0dd885d0cb 100644
--- a/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/eltwise_binary/single_core/eltwise_binary_op_single_core.cpp
@@ -79,13 +79,13 @@ operation::ProgramWithCallbacks eltwise_binary_single_core(const Tensor &a, cons
         (std::uint32_t) dst_is_dram
     };
 
-    KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+    KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_binary_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp
index d78b43dd2df..de41302a09a 100644
--- a/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp
@@ -56,13 +56,13 @@ operation::ProgramWithCallbacks eltwise_unary_multi_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp
index 2d95bbc5548..c060a9d9e72 100644
--- a/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/eltwise_unary/single_core/eltwise_unary_op_single_core.cpp
@@ -50,13 +50,13 @@ operation::ProgramWithCallbacks eltwise_unary_single_core(const Tensor &a, Tenso
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp b/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp
index 383e3289dc7..51dc25f9879 100644
--- a/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp
+++ b/tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp
@@ -388,7 +388,7 @@ operation::ProgramWithCallbacks embeddings_rm(
     std::vector<tt_metal::NOC> noc_ports = {tt_metal::NOC::RISCV_0_default, tt_metal::NOC::RISCV_1_default};
 
 
-    std::vector<tt::tt_metal::KernelID> kernIds(RISC_CORES_PER_TENSIX);
+    std::vector<tt::tt_metal::KernelHandle> kernIds(RISC_CORES_PER_TENSIX);
 
     for(int risc_id =0; risc_id < embedding_risc_cores_per_tensix; risc_id++){
        std::vector<uint32_t> embedding_compile_time_args= {  (std::uint32_t) in0_is_dram,
diff --git a/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp b/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp
index c7f4abf9cc9..8f503844582 100644
--- a/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp
+++ b/tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp
@@ -42,7 +42,7 @@ operation::ProgramWithCallbacks fill_rm_single_core(const Tensor& any, Tensor &o
     bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
     std::vector<uint32_t> reader_compile_time_args = {(std::uint32_t) dst_is_dram};
 
-    tt_metal::KernelID binary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle binary_reader_kernel_id = tt_metal::CreateKernel(
         program, "tt_eager/tt_dnn/kernels/dataflow/fill_rm_interleaved.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args=reader_compile_time_args});
diff --git a/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp b/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp
index f0169ca3912..962d81ca1f0 100644
--- a/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp
+++ b/tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp
@@ -83,7 +83,7 @@ std::tuple<uint32_t, CoreRangeSet, CoreRangeSet, CoreRangeSet, uint32_t, uint32_
         num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2);
 }
 
-KernelID CreateReadKernel(
+KernelHandle CreateReadKernel(
     Program &program,
     const std::string &file_name,
     const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec,
@@ -100,7 +100,7 @@ KernelID CreateReadKernel(
             .defines = defines});
 }
 
-KernelID CreateWriteKernel(
+KernelHandle CreateWriteKernel(
     Program &program,
     const std::string &file_name,
     const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec,
@@ -117,7 +117,7 @@ KernelID CreateWriteKernel(
             .defines = defines});
 }
 
-[[maybe_unused]] std::vector<KernelID> CreateComputeKernel(
+[[maybe_unused]] std::vector<KernelHandle> CreateComputeKernel(
     Program &program,
     const std::string &file_name,
     std::vector<ComputeKernelArg> args,
@@ -125,8 +125,8 @@ KernelID CreateWriteKernel(
     MathFidelity math_fidelity,
     bool fp32_dest_acc_en,
     bool math_approx_mode) {
-    std::vector<KernelID> compute_kernel_ids{};
-    KernelID compute_kernel_id{};
+    std::vector<KernelHandle> compute_kernel_ids{};
+    KernelHandle compute_kernel_id{};
     for (auto arg : args) {
         compute_kernel_id =
             CreateComputeKernel(program, file_name, arg, defines, math_fidelity, fp32_dest_acc_en, math_approx_mode);
@@ -135,7 +135,7 @@ KernelID CreateWriteKernel(
     return compute_kernel_ids;
 }
 
-[[maybe_unused]] KernelID CreateComputeKernel(
+[[maybe_unused]] KernelHandle CreateComputeKernel(
     Program &program,
     const std::string &file_name,
     ComputeKernelArg arg,
@@ -143,7 +143,7 @@ KernelID CreateWriteKernel(
     MathFidelity math_fidelity,
     bool fp32_dest_acc_en,
     bool math_approx_mode) {
-    KernelID compute_kernel_id{0};
+    KernelHandle compute_kernel_id{0};
     if (arg.num_tile_per_core_group > 0) {
         compute_kernel_id = CreateKernel(
             program,
@@ -159,10 +159,10 @@ KernelID CreateWriteKernel(
     return compute_kernel_id;
 }
 
-[[maybe_unused]] std::vector<CircularBufferID> CreateCircularBuffer(
+[[maybe_unused]] std::vector<CBHandle> CreateCircularBuffer(
     Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, std::vector<CircularBufferArg> args) {
-    std::vector<CircularBufferID> cb_ids{};
-    CircularBufferID cb_id{};
+    std::vector<CBHandle> cb_ids{};
+    CBHandle cb_id{};
     for (auto arg : args) {
         cb_id = CreateCircularBuffer(program, core_range, data_format, arg);
         cb_ids.push_back(cb_id);
@@ -170,9 +170,9 @@ KernelID CreateWriteKernel(
     return cb_ids;
 }
 
-[[maybe_unused]] CircularBufferID CreateCircularBuffer(
+[[maybe_unused]] CBHandle CreateCircularBuffer(
     Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, CircularBufferArg arg) {
-    CircularBufferID cb_id{0};
+    CBHandle cb_id{0};
     if (arg.num_tiles > 0) {
         auto _buffer_index = arg.buffer_index;
         auto _num_tiles = arg.num_tiles;
diff --git a/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp b/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp
index 1526210e956..262f784c7f0 100644
--- a/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp
+++ b/tt_eager/tt_dnn/op_library/moreh_helper_functions.hpp
@@ -55,14 +55,14 @@ std::tuple<CoreRangeSet, CoreRangeSet, CoreRangeSet> add_core_offset(
 std::tuple<uint32_t, CoreRangeSet, CoreRangeSet, CoreRangeSet, uint32_t, uint32_t> split_work_to_cores(
     CoreRange core_range, uint32_t units_to_divide);
 
-KernelID CreateReadKernel(
+KernelHandle CreateReadKernel(
     Program &program,
     const std::string &file_name,
     const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec,
     const std::vector<uint32_t> &compile_args,
     std::map<string, string> defines = {});
 
-KernelID CreateWriteKernel(
+KernelHandle CreateWriteKernel(
     Program &program,
     const std::string &file_name,
     const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec,
@@ -75,7 +75,7 @@ struct ComputeKernelArg {
     const std::vector<uint32_t> &compile_args;
 };
 
-[[maybe_unused]] std::vector<KernelID> CreateComputeKernel(
+[[maybe_unused]] std::vector<KernelHandle> CreateComputeKernel(
     Program &program,
     const std::string &file_name,
     std::vector<ComputeKernelArg> args,
@@ -84,7 +84,7 @@ struct ComputeKernelArg {
     bool fp32_dest_acc_en = false,
     bool math_approx_mode = false);
 
-[[maybe_unused]] KernelID CreateComputeKernel(
+[[maybe_unused]] KernelHandle CreateComputeKernel(
     Program &program,
     const std::string &file_name,
     ComputeKernelArg arg,
@@ -108,10 +108,10 @@ struct CircularBufferArg {
     }
 };
 
-[[maybe_unused]] std::vector<CircularBufferID> CreateCircularBuffer(
+[[maybe_unused]] std::vector<CBHandle> CreateCircularBuffer(
     Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, std::vector<CircularBufferArg> args);
 
-[[maybe_unused]] CircularBufferID CreateCircularBuffer(
+[[maybe_unused]] CBHandle CreateCircularBuffer(
     Program &program, const CoreRangeSet &core_range, tt::DataFormat data_format, CircularBufferArg arg);
 
 }  // namespace primary
diff --git a/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp
index d677673210e..0bf1d1eae73 100644
--- a/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp
@@ -156,7 +156,7 @@ operation::ProgramWithCallbacks moreh_matmul_multi_core(
     const auto compute_kernel_1_id = CreateComputeKernel(
         program, compute_kernel_file, {core_group_1, num_output_tiles_per_core_group_1, compute_args_group_1}, compute_defines);
 
-    std::optional<KernelID> compute_kernel_2_id = std::nullopt;
+    std::optional<KernelHandle> compute_kernel_2_id = std::nullopt;
     if (!core_group_2.ranges().empty()) {
         const std::vector<uint32_t> compute_args_group_2 = {
             1,                                  // B
diff --git a/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp b/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp
index bfac14700aa..80d00563fbd 100644
--- a/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/moreh_matmul_backward/sum/moreh_sum_multi_core.cpp
@@ -108,7 +108,7 @@ operation::ProgramWithCallbacks moreh_sum_multi_core(const Tensor &src, const Te
     const auto compute_kernel_1_id = CreateComputeKernel(
         program, compute_kernel_file, {core_group_1, num_cols_per_core_group_1, compute_args_group_1}, compute_defines);
 
-    std::optional<KernelID> compute_kernel_2_id = std::nullopt;
+    std::optional<KernelHandle> compute_kernel_2_id = std::nullopt;
     if (!core_group_2.ranges().empty()) {
         const std::vector<uint32_t> compute_args_group_2{num_cols_per_core_group_2};
         compute_kernel_2_id = CreateComputeKernel(
diff --git a/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp b/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp
index 53ef6f77555..78fc55824f3 100644
--- a/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp
+++ b/tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp
@@ -100,7 +100,7 @@ operation::ProgramWithCallbacks move_multi_core_with_overlap(const Tensor &input
         compile_time_args.push_back((uint32_t)page_size_is_power_of_two);
     }
 
-    KernelID kernel_id = CreateKernel(
+    KernelHandle kernel_id = CreateKernel(
         program,
         tilized ? "tt_eager/tt_dnn/op_library/move/kernels/move_interleaved_with_overlap.cpp" : "tt_eager/tt_dnn/op_library/move/kernels/move_stick_layout_interleaved_with_overlap.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/pad/pad_op.cpp b/tt_eager/tt_dnn/op_library/pad/pad_op.cpp
index ab946f79597..7dcaab8d40f 100644
--- a/tt_eager/tt_dnn/op_library/pad/pad_op.cpp
+++ b/tt_eager/tt_dnn/op_library/pad/pad_op.cpp
@@ -72,13 +72,13 @@ operation::ProgramWithCallbacks pad_rm_reader_writer(const Tensor &a,
     bfloat16 bfloat_zero = bfloat16(0.0f);
     uint32_t packed_pad_value = pack_two_bfloat16_into_uint32({bfloat_zero, bfloat_pad_value});
 
-    KernelID reader_kernel_id = CreateKernel(program,
+    KernelHandle reader_kernel_id = CreateKernel(program,
                                                         "tt_eager/tt_dnn/kernels/dataflow/reader_pad_dims_rm_interleaved.cpp",
                                                         cores,
                                                         DataMovementConfig{.processor = DataMovementProcessor::RISCV_1,
                                                                             .noc = NOC::RISCV_1_default,
                                                                             .compile_args = reader_ct_args});
-    KernelID writer_kernel_id = CreateKernel(program,
+    KernelHandle writer_kernel_id = CreateKernel(program,
                                                         "tt_eager/tt_dnn/kernels/dataflow/writer_pad_dims_rm_interleaved.cpp",
                                                         cores,
                                                         DataMovementConfig{.processor = DataMovementProcessor::RISCV_0,
@@ -218,7 +218,7 @@ operation::ProgramWithCallbacks pad_rm_opt(const Tensor &a,
     uint32_t packed_pad_value = pack_two_bfloat16_into_uint32({bfloat_zero, bfloat_pad_value});
 
     CoreRange core = {.start={0, 0}, .end={0, 0}};
-    KernelID reader_kernel_id = CreateKernel(program,
+    KernelHandle reader_kernel_id = CreateKernel(program,
                                                         "tt_eager/tt_dnn/kernels/dataflow/pad_dims_rm_interleaved_opt.cpp",
                                                         core,
                                                         DataMovementConfig{.processor = DataMovementProcessor::RISCV_1,
@@ -349,7 +349,7 @@ operation::ProgramWithCallbacks pad_rm(const Tensor &a, Tensor &output, const Sh
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/pad_dims_rm_interleaved.cpp",
         core,
@@ -464,13 +464,13 @@ operation::ProgramWithCallbacks pad_tile(const Tensor &a, Tensor& output, const
         (std::uint32_t) dst_is_dram
     };
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_pad_dims_interleaved.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp
index 315b237b0cb..600f2b207a7 100644
--- a/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp
@@ -191,13 +191,13 @@ operation::ProgramWithCallbacks pad_rm_reader_writer_multi_core(const Tensor &a,
     bfloat16 bfloat_zero = bfloat16(0.0f);
     uint32_t packed_pad_value = pack_two_bfloat16_into_uint32({bfloat_zero, bfloat_pad_value});
 
-    KernelID reader_kernel_id = CreateKernel(program,
+    KernelHandle reader_kernel_id = CreateKernel(program,
                                                         "tt_eager/tt_dnn/kernels/dataflow/reader_pad_dims_rm_interleaved.cpp",
                                                         all_cores,
                                                         DataMovementConfig{.processor = DataMovementProcessor::RISCV_1,
                                                                             .noc = NOC::RISCV_1_default,
                                                                             .compile_args = reader_ct_args});
-    KernelID writer_kernel_id = CreateKernel(program,
+    KernelHandle writer_kernel_id = CreateKernel(program,
                                                         "tt_eager/tt_dnn/kernels/dataflow/writer_pad_dims_rm_interleaved.cpp",
                                                         all_cores,
                                                         DataMovementConfig{.processor = DataMovementProcessor::RISCV_0,
diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp
index 7320ae93a9d..92bd844d7e0 100644
--- a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp
@@ -280,7 +280,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic(const Tensor &inp
 		                                        .set_page_size(in_scalar_cb_id, in_scalar_cb_pagesize);
     auto in_scalar_cb = tt_metal::CreateCircularBuffer(program, all_cores, in_scalar_cb_config);
 
-    CircularBufferID raw_in_cb = 0;
+    CBHandle raw_in_cb = 0;
     if (input.memory_config().is_sharded()) {
         // incoming data is the input cb instead of raw l1/dram addr
         auto raw_in_cb_id = CB::c_in2;
@@ -319,7 +319,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic(const Tensor &inp
 		.set_page_size(out_cb_id, out_cb_pagesize);
     auto cb_out = tt_metal::CreateCircularBuffer(program, all_cores, cb_out_config);
 
-    CircularBufferID cb_sharded_out = 0;
+    CBHandle cb_sharded_out = 0;
     if (output.memory_config().is_sharded()) {
         uint32_t sharded_out_cb_id = CB::c_out1;            // output rows in RM
 
@@ -1121,7 +1121,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T
 		.set_page_size(out_cb_id, out_cb_pagesize);
     auto cb_out = tt_metal::CreateCircularBuffer(program, all_cores, cb_out_config);
 
-    CircularBufferID cb_sharded_out = 0;
+    CBHandle cb_sharded_out = 0;
     if (output.memory_config().is_sharded()) {
         uint32_t sharded_out_cb_id = CB::c_out1;            // output rows in RM
 
diff --git a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp
index d934a62d2e2..832ac54de65 100644
--- a/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp
+++ b/tt_eager/tt_dnn/op_library/pool/max_pool_multi_core_sharded_with_halo.cpp
@@ -319,7 +319,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo(const T
 		.set_page_size(out_cb_id, out_cb_pagesize);
     auto cb_out = tt_metal::CreateCircularBuffer(program, all_cores, cb_out_config);
 
-    CircularBufferID cb_sharded_out = 0;
+    CBHandle cb_sharded_out = 0;
     if (output.memory_config().is_sharded()) {
         uint32_t sharded_out_cb_id = CB::c_out1;            // output rows in RM
 
diff --git a/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp
index a68bf29b182..5f75cbbad05 100644
--- a/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp
+++ b/tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp
@@ -60,9 +60,9 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out
     string compute_kernel_name = reduce_op_utils::dim_to_kernel_name(reduce_dim, reduce_op);
 
     uint32_t src0_cb_index = CB::c_in0;
-    CircularBufferID cb_src0;
+    CBHandle cb_src0;
     uint32_t src1_cb_index = CB::c_in1;
-    CircularBufferID cb_src1 = 0;
+    CBHandle cb_src1 = 0;
     if (in_sharded) {
         uint32_t num_shard_tiles = a.shard_spec().value().numel() / TILE_HW;
         uint32_t num_input_tiles = 2;
@@ -86,7 +86,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out
     auto cb_scaler = tt_metal::CreateCircularBuffer(program, all_cores, cb_scaler_config);
 
     uint32_t output_cb_index = CB::c_out0; // output operands start at index 16
-    CircularBufferID cb_output;
+    CBHandle cb_output;
     if (out_sharded) {
         uint32_t num_output_tiles = output.shard_spec().value().numel() / TILE_HW;
         tt_metal::CircularBufferConfig cb_output_config = tt_metal::CircularBufferConfig(num_output_tiles * dst_single_tile_size, {{output_cb_index, dst_cb_data_format}})
@@ -99,7 +99,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out
         cb_output = tt_metal::CreateCircularBuffer(program, all_cores, cb_output_config);
     }
     tt_metal::Buffer *src0_buffer = a.buffer();
-    tt_metal::KernelID reader_kernel_id;
+    tt_metal::KernelHandle reader_kernel_id;
     bfloat16 bfloat_scaler_value = bfloat16(scaler);
     uint32_t packed_scaler_value = pack_two_bfloat16_into_uint32({bfloat_scaler_value, bfloat_scaler_value});
     if (in_sharded) {
@@ -135,7 +135,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(const Tensor &a, Tensor& out
     }
 
     tt_metal::Buffer *dst_buffer = output.buffer();
-    tt_metal::KernelID writer_kernel_id;
+    tt_metal::KernelHandle writer_kernel_id;
 
     if (out_sharded) {
         vector<uint32_t> writer_ct_args = {
diff --git a/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp b/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp
index fb8b99d9f23..9b64a03bca9 100644
--- a/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp
+++ b/tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp
@@ -77,13 +77,13 @@ operation::ProgramWithCallbacks reduce_multi_core_w(const Tensor &a, Tensor& out
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/reduce/kernels/dataflow/reader_unary_reduce_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp b/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp
index 3a65b6984e4..3db93bb6f95 100644
--- a/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/reduce/single_core/reduce_op_single_core.cpp
@@ -84,7 +84,7 @@ operation::ProgramWithCallbacks reduce_single_core(const Tensor &a, Tensor& outp
     if (reduce_dim == ReduceOpDim::H) {
         reader_defines["REDUCE_SCALER"] = "1";
     }
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         reduce_dim == ReduceOpDim::H ?
             "tt_eager/tt_dnn/kernels/dataflow/reader_unary_transpose_wh_interleaved.cpp" :
@@ -92,7 +92,7 @@ operation::ProgramWithCallbacks reduce_single_core(const Tensor &a, Tensor& outp
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp b/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp
index 2fb4ff882e8..0b643e1212d 100644
--- a/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp
+++ b/tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp
@@ -57,13 +57,13 @@ operation::ProgramWithCallbacks reshape_tile_single_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reshape_interleaved.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
@@ -200,13 +200,13 @@ operation::ProgramWithCallbacks reshape_rm_single_core(const Tensor &a, Tensor&
         writer_compile_time_args.push_back(0);
     }
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_reshape_stick_layout_interleaved.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_reshape_stick_layout_interleaved.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp
index f4dcabcf973..4fbf12a2709 100644
--- a/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp
@@ -177,13 +177,13 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core(const Tensor &input,
         });
     }
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_rotary_embedding_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines=kernel_defines});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_rotary_embedding_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp b/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp
index 779d7ddad72..3616d69a4ba 100644
--- a/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/rotary_embedding/single_core/rotary_embedding_op_single_core.cpp
@@ -178,13 +178,13 @@ operation::ProgramWithCallbacks rotary_embedding_single_core(const Tensor &input
         });
     }
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_rotary_embedding_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines=kernel_defines});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_rotary_embedding_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp b/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp
index 8f826572857..97c39853d1a 100644
--- a/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp
@@ -78,13 +78,13 @@ operation::ProgramWithCallbacks rotate_half_single_core(const Tensor &input, Ten
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_rotate_half_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_rotate_half_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp
index 44c9006d5f6..9de93186cc9 100644
--- a/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp
@@ -68,7 +68,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(const Tensor &
 
     bool src_is_dram = src_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
 
-    tt_metal::KernelID unary_reader_kernel_id;
+    tt_metal::KernelHandle unary_reader_kernel_id;
     if (input.layout() == Layout::TILE) {
 
         std::vector<uint32_t> reader_compile_time_args = {
@@ -99,7 +99,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(const Tensor &
     }
 
     std::vector<uint32_t> writer_compile_time_args = {out_cb_index};
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp",
         all_cores,
@@ -262,7 +262,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(const Tensor &
         (std::uint32_t) src0_cb_index
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp",
         all_cores,
@@ -271,7 +271,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(const Tensor &
 
     bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
 
-    tt_metal::KernelID unary_writer_kernel_id;
+    tt_metal::KernelHandle unary_writer_kernel_id;
     if (input.layout() == Layout::TILE) {
         std::vector<uint32_t> writer_compile_time_args = {
             (std::uint32_t) src0_cb_index,
diff --git a/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp b/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp
index a104c124595..384149e4d56 100644
--- a/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp
+++ b/tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp
@@ -152,9 +152,9 @@ operation::ProgramWithCallbacks scale_mask_softmax_(const Tensor &input_tensor,
     auto cb_in2_id = CreateCircularBuffer( program, all_device_cores, c_in2_config );
     auto c_intermed0_config = CircularBufferConfig(im0_t * in0_tile_size, {{CB::c_intermed0, in0_cb_data_format}}).set_page_size(CB::c_intermed0, in0_tile_size);
     auto cb_intermed0_id = CreateCircularBuffer( program, all_device_cores, c_intermed0_config );
-    std::optional<CircularBufferID> cb_intermed3_id;
-    std::optional<CircularBufferID> cb_in3_id;
-    std::optional<CircularBufferID> cb_in4_id;
+    std::optional<CBHandle> cb_intermed3_id;
+    std::optional<CBHandle> cb_in3_id;
+    std::optional<CBHandle> cb_in4_id;
     if (mask.has_value()) {
         CircularBufferConfig c_intermed3_config = CircularBufferConfig(im3_t * in0_tile_size, {{CB::c_intermed3, in0_cb_data_format}}).set_page_size(CB::c_intermed3, in0_tile_size);
         cb_intermed3_id = CreateCircularBuffer( program, all_device_cores, c_intermed3_config );
diff --git a/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp b/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp
index 3e4d123709c..8e64ce8db93 100644
--- a/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp
+++ b/tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp
@@ -34,8 +34,8 @@ void setup_runtime(
     tt_metal::Buffer *in0_buffer,
     tt_metal::Buffer *out0_buffer,
     tt_metal::Buffer *out1_buffer,
-    tt_metal::KernelID reader_kernel_id,
-    tt_metal::KernelID writer_kernel_id) {
+    tt_metal::KernelHandle reader_kernel_id,
+    tt_metal::KernelHandle writer_kernel_id) {
     uint32_t start_core_x = 0;
     uint32_t start_core_y = 0;
 
diff --git a/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp
index 764fc2c93af..21eb0eea5b8 100644
--- a/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp
@@ -197,7 +197,7 @@ operation::ProgramWithCallbacks tilize_multi_core_interleaved(const Tensor &a, T
         (std::uint32_t) stick_size_is_power_of_two,
         (std::uint32_t) log2_stick_size,
     };
-    KernelID unary_reader_kernel_id = CreateKernel(
+    KernelHandle unary_reader_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_split_rows_interleaved.cpp",
         all_cores,
@@ -213,7 +213,7 @@ operation::ProgramWithCallbacks tilize_multi_core_interleaved(const Tensor &a, T
         (std::uint32_t) output_cb_index,
         (std::uint32_t) out_is_dram
     };
-    KernelID unary_writer_kernel_id = CreateKernel(
+    KernelHandle unary_writer_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_cores,
@@ -415,13 +415,13 @@ operation::ProgramWithCallbacks tilize_multi_core_sharded(const Tensor &input, T
         (std::uint32_t) output_cb_index
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/writer_unary_sharded.cpp",
         all_cores,
@@ -558,7 +558,7 @@ operation::ProgramWithCallbacks tilize_with_val_padding_multi_core(const Tensor
 
     /** reader
      */
-    KernelID unary_reader_kernel_id;
+    KernelHandle unary_reader_kernel_id;
     std::vector<uint32_t> reader_ct_args = {
             (std::uint32_t) src0_cb_index,
             (std::uint32_t) src1_cb_index,
@@ -573,7 +573,7 @@ operation::ProgramWithCallbacks tilize_with_val_padding_multi_core(const Tensor
 
     /** writer
      */
-    KernelID unary_writer_kernel_id;
+    KernelHandle unary_writer_kernel_id;
     bool out_is_dram = dst_buffer->buffer_type() == BufferType::DRAM ? 1 : 0;
     vector<uint32_t> writer_ct_args = {
         output_cb_index,
diff --git a/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp b/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp
index 761f0190d09..24e8c854901 100644
--- a/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp
@@ -111,14 +111,14 @@ operation::ProgramWithCallbacks tilize_single_core(const Tensor &a, Tensor& outp
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_stick_layout_split_rows_interleaved.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
     // Tilized writer
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
@@ -314,14 +314,14 @@ operation::ProgramWithCallbacks tilize_with_val_padding_single_core(const Tensor
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_pad_dims_split_rows.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
     // Tilized writer
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp
index fb40179f623..62116c806e5 100644
--- a/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_multi_core.cpp
@@ -136,13 +136,13 @@ operation::ProgramWithCallbacks transpose_cn_single_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_cn_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp
index 0d0db7ae3b6..bebe91515e5 100644
--- a/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp
@@ -147,13 +147,13 @@ operation::ProgramWithCallbacks transpose_hc_multi_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp",
         total_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         total_cores,
diff --git a/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp b/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp
index 822f7c9d117..122cc4d25c9 100644
--- a/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/transpose/single_core/transpose_op_single_core.cpp
@@ -84,13 +84,13 @@ operation::ProgramWithCallbacks transpose_wh_single_core(const Tensor &a, Tensor
     };
 
     //TODO: move this kernel, currently being used in reduce, can't move to op library
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_transpose_wh_interleaved.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
@@ -235,13 +235,13 @@ operation::ProgramWithCallbacks transpose_hc_single_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_hc_interleaved_partitioned.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
@@ -350,13 +350,13 @@ operation::ProgramWithCallbacks transpose_cn_single_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_cn_interleaved.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp
index 5cd83065a3a..ab963afef14 100644
--- a/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp
@@ -144,13 +144,13 @@ operation::ProgramWithCallbacks transpose_wh_multi_core(const Tensor &a, Tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/transpose/kernels/dataflow/reader_unary_transpose_wh_interleaved_start_id.cpp",
         total_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         total_cores,
diff --git a/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp
index 5d09d590976..5a881cb91a8 100644
--- a/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp
@@ -171,13 +171,13 @@ operation::ProgramWithCallbacks unpad_rm_multi_core(const Tensor &a, Tensor& out
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_rm_interleaved_start_id.cpp",
         total_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args_vec});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp",
         total_cores,
@@ -393,13 +393,13 @@ operation::ProgramWithCallbacks unpad_tile_multi_core(const Tensor &a, Tensor& o
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_interleaved_start_id.cpp",
         total_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         total_cores,
diff --git a/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp b/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp
index 8969ca374d9..d7c9ed31e8a 100644
--- a/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/unpad/single_core/unpad_op_single_core.cpp
@@ -126,13 +126,13 @@ operation::ProgramWithCallbacks unpad_rm_single_core(const Tensor &a, Tensor& ou
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_rm_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args_vec});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/writer_unary_stick_layout_interleaved_start_id.cpp",
         core,
@@ -281,13 +281,13 @@ operation::ProgramWithCallbacks unpad_tile_single_core(const Tensor &a, Tensor&
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/unpad/kernels/dataflow/reader_unary_unpad_dims_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp
index 9a222160d1b..696c97691f8 100644
--- a/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp
@@ -233,7 +233,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out
 
     /** reader
      */
-    KernelID unary_reader_kernel_id;
+    KernelHandle unary_reader_kernel_id;
 
     if (src_sharded) {
         std::vector<uint32_t> reader_ct_args = {
@@ -263,7 +263,7 @@ operation::ProgramWithCallbacks untilize_multi_core(const Tensor& a, Tensor& out
 
     /** writer
      */
-    KernelID unary_writer_kernel_id;
+    KernelHandle unary_writer_kernel_id;
     if (out_sharded) {
         std::vector<uint32_t> writer_ct_args = {
             (std::uint32_t) output_cb_index
@@ -650,7 +650,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor
         .set_page_size(output_cb_index, output_single_tile_size);
     auto cb_output = tt_metal::CreateCircularBuffer(program, all_cores, output_cb_config);
 
-    CircularBufferID cb_sharded_output = 0;
+    CBHandle cb_sharded_output = 0;
     uint32_t sharded_output_cb_index = CB::c_out1;
     if (out_sharded) {
         tt_metal::CircularBufferConfig sharded_output_cb_config = tt_metal::CircularBufferConfig(num_output_rows_unpadded * block_row_size, {{sharded_output_cb_index, output_cb_data_format}})
@@ -664,7 +664,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor
 
     /** reader
      */
-    KernelID unary_reader_kernel_id;
+    KernelHandle unary_reader_kernel_id;
     std::vector<uint32_t> reader_ct_args = {
             (std::uint32_t) src0_cb_index
         };
@@ -677,7 +677,7 @@ operation::ProgramWithCallbacks untilize_with_unpadding_multi_core(const Tensor
 
     /** writer
      */
-    KernelID unary_writer_kernel_id;
+    KernelHandle unary_writer_kernel_id;
     if (out_sharded) {
         vector<uint32_t> writer_ct_args = {
             (uint32_t) output_cb_index,
diff --git a/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp b/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp
index b48e9902c56..382e4112b2a 100644
--- a/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp
@@ -104,14 +104,14 @@ operation::ProgramWithCallbacks untilize_single_core(const Tensor &a, Tensor& ou
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
     // Untilized writer
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_stick_layout_split_rows_interleaved.cpp",
         core,
@@ -283,14 +283,14 @@ operation::ProgramWithCallbacks untilize_with_unpadding_single_core(const Tensor
     };
 
     // Tilized reader
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
     // Untilized writer
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_unpad_dims_split_rows.cpp",
         core,
diff --git a/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp b/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp
index e64b932a880..bc58ef916b7 100644
--- a/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp
+++ b/tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp
@@ -261,7 +261,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i
         (std::uint32_t) src_cb_id
     };
 
-    KernelID reader_kernel_id = CreateKernel(
+    KernelHandle reader_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp",
         all_cores,
@@ -284,7 +284,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i
         (std::uint32_t) pc.pad_w,
         (std::uint32_t) pc.pad_h,
     };
-    KernelID writer_kernel_id = CreateKernel(
+    KernelHandle writer_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo_s2.cpp",
         all_cores,
@@ -300,7 +300,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s2(const Tensor& i
         (uint32_t) nblocks_per_core,    // per_core_block_cnt
         (uint32_t) ntiles_per_block,    // per_block_ntiles
     };
-    KernelID untilize_kernel_id = CreateKernel(
+    KernelHandle untilize_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/untilize/kernels/compute/untilize.cpp",
         all_cores,
@@ -899,7 +899,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a
         (std::uint32_t) src_cb_id
     };
 
-    KernelID reader_kernel_id = CreateKernel(
+    KernelHandle reader_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/sharded/kernels/dataflow/reader_unary_sharded.cpp",
         all_cores,
@@ -922,7 +922,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a
         (std::uint32_t) pad_w,
         (std::uint32_t) pad_h,
     };
-    KernelID writer_kernel_id = CreateKernel(
+    KernelHandle writer_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/untilize/kernels/dataflow/writer_unary_sharded_with_halo.cpp",
         all_cores,
@@ -938,7 +938,7 @@ operation::ProgramWithCallbacks untilize_with_halo_multi_core_s1(const Tensor& a
         (uint32_t) nblocks_per_core,    // per_core_block_cnt
         (uint32_t) ntiles_per_block,    // per_block_ntiles
     };
-    KernelID untilize_kernel_id = CreateKernel(
+    KernelHandle untilize_kernel_id = CreateKernel(
         program,
         "tt_eager/tt_dnn/op_library/untilize/kernels/compute/untilize.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp
index d7157d0c181..3ea86119fa9 100644
--- a/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp
+++ b/tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp
@@ -55,13 +55,13 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         all_cores,
diff --git a/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp b/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp
index 8f86415a57b..27f4a076a37 100644
--- a/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp
+++ b/tt_eager/tt_dnn/op_library/update_cache/single_core/update_cache_op_single_core.cpp
@@ -100,13 +100,13 @@ operation::ProgramWithCallbacks update_cache_single_core(const Tensor& cache_ten
         (std::uint32_t) interm2_cb_index
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_update_cache_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_update_cache_interleaved_start_id.cpp",
         core,
@@ -230,13 +230,13 @@ operation::ProgramWithCallbacks fill_cache_single_core(const Tensor& cache_tenso
         (std::uint32_t) dst_is_dram
     };
 
-    tt_metal::KernelID unary_reader_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_reader_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/reader_unary_interleaved_start_id.cpp",
         core,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
 
-    tt_metal::KernelID unary_writer_kernel_id = tt_metal::CreateKernel(
+    tt_metal::KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
         program,
         "tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
         core,
diff --git a/tt_metal/detail/program.hpp b/tt_metal/detail/program.hpp
index f51e893b893..b8e3fb38a6b 100644
--- a/tt_metal/detail/program.hpp
+++ b/tt_metal/detail/program.hpp
@@ -13,16 +13,16 @@ using namespace tt::tt_metal;
 
 namespace tt::tt_metal::detail{
 
-    inline KernelID AddKernel ( Program & program, Kernel * kernel)
+    inline KernelHandle AddKernel ( Program & program, Kernel * kernel)
     {
         return program.add_kernel(kernel);
     }
 
-    inline Kernel *GetKernel(const Program &program, KernelID kernel_id) {
+    inline Kernel *GetKernel(const Program &program, KernelHandle kernel_id) {
         return program.kernels_.at(kernel_id);
     }
 
-    inline std::shared_ptr<CircularBuffer> GetCircularBuffer(const Program &program, CircularBufferID id) {
+    inline std::shared_ptr<CircularBuffer> GetCircularBuffer(const Program &program, CBHandle id) {
         return program.get_circular_buffer(id);
     }
 
diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp
index 3fa9f3e65a1..46627a31547 100644
--- a/tt_metal/detail/tt_metal.hpp
+++ b/tt_metal/detail/tt_metal.hpp
@@ -355,7 +355,7 @@ namespace tt::tt_metal{
             bool riscv0_in_use = false; bool riscv1_in_use = false;
             bool noc0_in_use = false; bool noc1_in_use = false;
 
-            auto set_global_and_local_noc_usage = [&](KernelID kernel_id, bool &local_noc0_usage, bool &local_noc1_usage) {
+            auto set_global_and_local_noc_usage = [&](KernelHandle kernel_id, bool &local_noc0_usage, bool &local_noc1_usage) {
                 const auto kernel = detail::GetKernel(program, kernel_id);
                 auto kernel_config = std::get<DataMovementConfig>(kernel->config());
                 auto noc_value = magic_enum::enum_integer(kernel_config.noc);
diff --git a/tt_metal/host_api.hpp b/tt_metal/host_api.hpp
index 3816a25a8db..df2bf03673e 100644
--- a/tt_metal/host_api.hpp
+++ b/tt_metal/host_api.hpp
@@ -82,7 +82,7 @@ Program CreateProgram();
  * | core_spec    | Either a single logical core, a range of logical cores or a set of logical core ranges that indicate which cores kernel is placed on | const std::variant<CoreCoord, CoreRange, CoreRangeSet> & |             | Yes      |
  * | config       | Config for data movement or compute kernel                                                                                           | const std::variant<DataMovementConfig,ComputeConfig,EthernetConfig> &   |             | No       |
  */
-KernelID CreateKernel(Program &program, const std::string &file_name, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const std::variant<DataMovementConfig,ComputeConfig,experimental::EthernetConfig> & config);
+KernelHandle CreateKernel(Program &program, const std::string &file_name, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const std::variant<DataMovementConfig,ComputeConfig,experimental::EthernetConfig> & config);
 
 // ==================================================
 //                  HOST API: buffers
@@ -103,7 +103,7 @@ KernelID CreateKernel(Program &program, const std::string &file_name, const std:
  * | core_spec | Either a single logical core, a range of logical cores or a set of logical core ranges that indicate where the circular buffer will be configured | const std::variant<CoreCoord, CoreRange, CoreRangeSet> & |             | Yes      |
  * | config    | Config for circular buffer                                                                                                                        | const CircularBufferConfig &                             |             | Yes      |
  */
-CircularBufferID CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config);
+CBHandle CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config);
 
 /**
  * Gets a reference to the config owned by circular buffer at the given circular buffer ID.
@@ -113,9 +113,9 @@ CircularBufferID CreateCircularBuffer(Program &program, const std::variant<CoreC
  * | Argument  | Description                                                    | Type                         | Valid Range | Required |
  * |-----------|----------------------------------------------------------------|------------------------------|-------------|----------|
  * | program   | The program containing the circular buffer                     | Program &                    |             | Yes      |
- * | cb_handle | ID of the circular buffer, returned by `CreateCircularBuffers` | CircularBufferID (uintptr_t) |             | Yes      |
+ * | cb_handle | ID of the circular buffer, returned by `CreateCircularBuffers` | CBHandle (uintptr_t) |             | Yes      |
 */
-const CircularBufferConfig &GetCircularBufferConfig(Program &program, CircularBufferID cb_handle);
+const CircularBufferConfig &GetCircularBufferConfig(Program &program, CBHandle cb_handle);
 
 /**
  * Update the total size of the circular buffer at the given circular buffer handle. Updating a program-local circular buffer requires all circular buffers in the program to be reallocated.
@@ -125,10 +125,10 @@ const CircularBufferConfig &GetCircularBufferConfig(Program &program, CircularBu
  * | Argument   | Description                                                    | Type                         | Valid Range | Required |
  * |------------|----------------------------------------------------------------|------------------------------|-------------|----------|
  * | program    | The program containing the circular buffer                     | Program &                    |             | Yes      |
- * | cb_handle  | ID of the circular buffer, returned by `CreateCircularBuffers` | CircularBufferID (uintptr_t) |             | Yes      |
+ * | cb_handle  | ID of the circular buffer, returned by `CreateCircularBuffers` | CBHandle (uintptr_t) |             | Yes      |
  * | total_size | New size of the circular buffer in bytes                       | uint32_t                     |             | Yes      |
 */
-void UpdateCircularBufferTotalSize(Program &program, CircularBufferID cb_handle, uint32_t total_size);
+void UpdateCircularBufferTotalSize(Program &program, CBHandle cb_handle, uint32_t total_size);
 
 /**
  * Update the page size at specified `buffer_index` of the circular buffer at the given circular buffer handle.
@@ -138,11 +138,11 @@ void UpdateCircularBufferTotalSize(Program &program, CircularBufferID cb_handle,
  * | Argument     | Description                                                                                                                | Type                         | Valid Range                   | Required |
  * |--------------|----------------------------------------------------------------------------------------------------------------------------|------------------------------|-------------------------------|----------|
  * | program      | The program containing the circular buffer                                                                                 | Program &                    |                               | Yes      |
- * | cb_handle    | ID of the circular buffer, returned by `CreateCircularBuffers`                                                             | CircularBufferID (uintptr_t) |                               | Yes      |
+ * | cb_handle    | ID of the circular buffer, returned by `CreateCircularBuffers`                                                             | CBHandle (uintptr_t) |                               | Yes      |
  * | buffer_index | Circular buffer index to update page size. `cb_handle` must be a circular buffer that had previously programmed this index | uint8_t                      | 0 to NUM_CIRCULAR_BUFFERS - 1 | Yes      |
  * | page_size    | Updated page size in bytes                                                                                                 | uint32_t                     |                               | Yes      |
 */
-void UpdateCircularBufferPageSize(Program &program, CircularBufferID cb_handle, uint8_t buffer_index, uint32_t page_size);
+void UpdateCircularBufferPageSize(Program &program, CBHandle cb_handle, uint8_t buffer_index, uint32_t page_size);
 
 /**
  * Update the address of a dynamic circular buffer. Dynamic circular buffers share the same address space as L1 buffers.
@@ -152,10 +152,10 @@ void UpdateCircularBufferPageSize(Program &program, CircularBufferID cb_handle,
  * | Argument  | Description                                                                              | Type                         | Valid Range | Required |
  * |-----------|------------------------------------------------------------------------------------------|------------------------------|-------------|----------|
  * | program   | The program containing the circular buffer                                               | Program &                    |             | Yes      |
- * | cb_handle | ID of the circular buffer, returned by `CreateCircularBuffers`                           | CircularBufferID (uintptr_t) |             | Yes      |
+ * | cb_handle | ID of the circular buffer, returned by `CreateCircularBuffers`                           | CBHandle (uintptr_t) |             | Yes      |
  * | buffer    | Dynamically allocated L1 buffer that shares address space of circular buffer `cb_handle` | const Buffer &               | L1 buffer   | Yes      |
  */
-void UpdateDynamicCircularBufferAddress(Program &program, CircularBufferID cb_handle, const Buffer &buffer);
+void UpdateDynamicCircularBufferAddress(Program &program, CBHandle cb_handle, const Buffer &buffer);
 
 /**
  * Initializes semaphore on all cores within core range (inclusive). Each core can have up to four 32B semaphores.
@@ -208,11 +208,11 @@ void DeallocateBuffer(Buffer &buffer);
  * | Argument     | Description                                                            | Type                                                   | Valid Range                                                         | Required |
  * |--------------|------------------------------------------------------------------------|--------------------------------------------------------|---------------------------------------------------------------------|----------|
  * | program      | The program containing kernels, circular buffers, semaphores           | const Program &                                        |                                                                     | Yes      |
- * | kernel_id    | ID of the kernel that will receive the runtime args                    | KernelID (uint64_t)                                    |                                                                     | Yes      |
+ * | kernel_id    | ID of the kernel that will receive the runtime args                    | KernelHandle (uint64_t)                                    |                                                                     | Yes      |
  * | core_spec    | Location of Tensix core(s) where the runtime args will be written      | const std::variant<CoreCoord,CoreRange,CoreRangeSet> & | Any logical Tensix core coordinate(s) on which the kernel is placed | Yes      |
  * | runtime_args | The runtime args to be written                                         | const std::vector<uint32_t> &                          |                                                                     | Yes      |
  */
-void SetRuntimeArgs(const Program &program, KernelID kernel, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const std::vector<uint32_t> &runtime_args);
+void SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const std::vector<uint32_t> &runtime_args);
 
 /**
  * Set multiple runtime arguments of a kernel at once during runtime, each mapping to a specific core. The runtime args for each core may be unique.
@@ -222,11 +222,11 @@ void SetRuntimeArgs(const Program &program, KernelID kernel, const std::variant<
  * | Argument     | Description                                                            | Type                                                   | Valid Range                                                                | Required |
  * |--------------|------------------------------------------------------------------------|--------------------------------------------------------|----------------------------------------------------------------------------|----------|
  * | program      | The program containing kernels, circular buffers, semaphores           | const Program &                                        |                                                                            | Yes      |
- * | kernel_id    | ID of the kernel that will receive the runtime args                    | KernelID (uint64_t)                                    |                                                                            | Yes      |
+ * | kernel_id    | ID of the kernel that will receive the runtime args                    | KernelHandle (uint64_t)                                    |                                                                            | Yes      |
  * | core_spec    | Location of Tensix core(s) where the runtime args will be written      | const std::vector<CoreCoord> &                         | Any set of logical Tensix core coordinates on which the kernel is placed   | Yes      |
  * | runtime_args | The runtime args to be written                                         | const std::vector< vector<uint32_t> > &                | outer vector size must be equal to size of core_spec vector                | Yes      |
  */
-void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector<uint32_t> > &runtime_args);
+void SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector<uint32_t> > &runtime_args);
 
 /**
  * Get the runtime args for a kernel.
@@ -236,10 +236,10 @@ void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector<
  * | Argument     | Description                                                            | Type                          | Valid Range                        | Required |
  * |--------------|------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
  * | program      | The program containing kernels, circular buffers, semaphores           | const Program &               |                                    | Yes      |
- * | kernel_id    | ID of the kernel that will receive the runtime args                    | KernelID (uint64_t)                |                                    | Yes      |
+ * | kernel_id    | ID of the kernel that will receive the runtime args                    | KernelHandle (uint64_t)                |                                    | Yes      |
  * | logical_core | The location of the Tensix core where the runtime args will be written | const CoreCoord &             | Any logical Tensix core coordinate | Yes      |
  */
-std::vector<uint32_t>& GetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreCoord &logical_core);
+std::vector<uint32_t>& GetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreCoord &logical_core);
 
 /**
  * Reads a buffer from the device
diff --git a/tt_metal/impl/buffers/circular_buffer.hpp b/tt_metal/impl/buffers/circular_buffer.hpp
index bc6f53f1ba6..672c53a9612 100644
--- a/tt_metal/impl/buffers/circular_buffer.hpp
+++ b/tt_metal/impl/buffers/circular_buffer.hpp
@@ -17,7 +17,7 @@ class CircularBuffer {
    public:
     CircularBuffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config);
 
-    const CircularBufferID id() const { return id_; }
+    const CBHandle id() const { return id_; }
 
     const CoreRangeSet &core_ranges() const { return core_ranges_; }
 
diff --git a/tt_metal/impl/buffers/circular_buffer_types.hpp b/tt_metal/impl/buffers/circular_buffer_types.hpp
index ab78763d3df..8bd384c0231 100644
--- a/tt_metal/impl/buffers/circular_buffer_types.hpp
+++ b/tt_metal/impl/buffers/circular_buffer_types.hpp
@@ -17,7 +17,7 @@
 
 namespace tt::tt_metal {
 
-using CircularBufferID = uintptr_t;
+using CBHandle = uintptr_t;
 
 class CircularBufferConfig {
    public:
diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp
index 762c5af5b9f..da7d199d963 100644
--- a/tt_metal/impl/dispatch/command_queue.cpp
+++ b/tt_metal/impl/dispatch/command_queue.cpp
@@ -164,14 +164,14 @@ ProgramMap ConstructProgramMap(const Device* device, Program& program) {
         // which use multiple core ranges
         bool linked = dst_noc_multicast_info.size() == 1;
 
-        vector<KernelID> kernel_ids;
+        vector<KernelHandle> kernel_ids;
         if (kg.riscv0_id) kernel_ids.push_back(kg.riscv0_id.value());
         if (kg.riscv1_id) kernel_ids.push_back(kg.riscv1_id.value());
         if (kg.compute_id) kernel_ids.push_back(kg.compute_id.value());
 
         uint32_t src_copy = src;
         for (size_t i = 0; i < kernel_ids.size(); i++) {
-            KernelID kernel_id = kernel_ids[i];
+            KernelHandle kernel_id = kernel_ids[i];
             vector<RISCV> sub_kernels;
             const Kernel* kernel = detail::GetKernel(program, kernel_id);
             if (kernel->processor() == RISCV::COMPUTE) {
@@ -254,11 +254,11 @@ ProgramMap ConstructProgramMap(const Device* device, Program& program) {
     // Create a vector of all program binaries/cbs/semaphores
     uint32_t program_page_idx = 0;
     for (const KernelGroup &kg: program.get_kernel_groups()) {
-        vector<KernelID> kernel_ids;
+        vector<KernelHandle> kernel_ids;
         if (kg.riscv0_id) kernel_ids.push_back(kg.riscv0_id.value());
         if (kg.riscv1_id) kernel_ids.push_back(kg.riscv1_id.value());
         if (kg.compute_id) kernel_ids.push_back(kg.compute_id.value());
-        for (KernelID kernel_id: kernel_ids) {
+        for (KernelHandle kernel_id: kernel_ids) {
             const Kernel* kernel = detail::GetKernel(program, kernel_id);
 
             for (const ll_api::memory& kernel_bin : kernel->binaries(device->id())) {
diff --git a/tt_metal/impl/kernels/kernel_types.hpp b/tt_metal/impl/kernels/kernel_types.hpp
index 0eb789fc4b6..8b702eca750 100644
--- a/tt_metal/impl/kernels/kernel_types.hpp
+++ b/tt_metal/impl/kernels/kernel_types.hpp
@@ -11,7 +11,7 @@
 
 namespace tt::tt_metal {
 
-using KernelID = std::uint16_t;
+using KernelHandle = std::uint16_t;
 
 enum class DataMovementProcessor {
     RISCV_0 = 0,  // BRISC
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index b0a43ff6bf2..fc054ee9e6e 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -95,16 +95,16 @@ std::atomic<uint64_t> Program::program_counter = 0;
 
 Program::Program(): id(program_counter++),worker_crs_({}), local_circular_buffer_allocation_needed_(false) {}
 
-KernelID Program::add_kernel(Kernel *kernel) {
+KernelHandle Program::add_kernel(Kernel *kernel) {
     this->invalidate_compile();
-    KernelID id = kernels_.size();
+    KernelHandle id = kernels_.size();
     kernels_.push_back(kernel);
     kernel_groups_.resize(0);
     core_to_kernel_group_index_table_.clear();
     return id;
 }
 
-Kernel *Program::get_kernel(KernelID kernel_id) const {
+Kernel *Program::get_kernel(KernelHandle kernel_id) const {
     //TT_ASSERT(kernel_id < this->kernels_.size(), "Expected Kernel with ID {} to be in Program {}", kernel_id, this->id);
     return this->kernels_.at(kernel_id);
 }
@@ -114,10 +114,10 @@ KernelGroup::KernelGroup() : core_ranges({}) {
 
 KernelGroup::KernelGroup(
     const Program &program,
-    std::optional<KernelID> brisc_id,
-    std::optional<KernelID> ncrisc_id,
-    std::optional<KernelID> trisc_id,
-    std::optional<KernelID> erisc_id,
+    std::optional<KernelHandle> brisc_id,
+    std::optional<KernelHandle> ncrisc_id,
+    std::optional<KernelHandle> trisc_id,
+    std::optional<KernelHandle> erisc_id,
     int last_cb_index,
     const CoreRangeSet &new_ranges) :
     core_ranges({}) {
@@ -187,26 +187,26 @@ KernelGroup * Program::kernels_on_core(const CoreCoord &core) {
 
 struct KernelGroupInt {
     bool valid;
-    std::optional<KernelID> trisc_id = std::nullopt;
-    std::optional<KernelID> brisc_id = std::nullopt;
-    std::optional<KernelID> ncrisc_id = std::nullopt;
-    std::optional<KernelID> erisc_id = std::nullopt;
+    std::optional<KernelHandle> trisc_id = std::nullopt;
+    std::optional<KernelHandle> brisc_id = std::nullopt;
+    std::optional<KernelHandle> ncrisc_id = std::nullopt;
+    std::optional<KernelHandle> erisc_id = std::nullopt;
 
     bool operator==(const KernelGroupInt& b) const;
     void update(Kernel* kernel, size_t kernel_idx) {
         RISCV riscv_processor = kernel->processor();
         switch (riscv_processor) {
         case RISCV::BRISC:
-            this->brisc_id = static_cast<KernelID>(kernel_idx);
+            this->brisc_id = static_cast<KernelHandle>(kernel_idx);
             break;
         case RISCV::NCRISC:
-            this->ncrisc_id = static_cast<KernelID>(kernel_idx);
+            this->ncrisc_id = static_cast<KernelHandle>(kernel_idx);
             break;
         case RISCV::COMPUTE:
-            this->trisc_id = static_cast<KernelID>(kernel_idx);
+            this->trisc_id = static_cast<KernelHandle>(kernel_idx);
             break;
         case RISCV::ERISC:
-            this->erisc_id = static_cast<KernelID>(kernel_idx);
+            this->erisc_id = static_cast<KernelHandle>(kernel_idx);
             break;
         default:
             TT_ASSERT(false, "Unsupported kernel processor!");
@@ -337,7 +337,7 @@ void Program::CircularBufferAllocator::mark_address(uint64_t address, uint64_t s
     }
 }
 
-CircularBufferID Program::add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config) {
+CBHandle Program::add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config) {
     this->invalidate_compile();
     std::shared_ptr<CircularBuffer> circular_buffer = std::make_shared<CircularBuffer>(core_range_set, config);
     // Globally allocated circular buffer do not invalidate allocation because their addresses are tracked by memory allocator
@@ -378,7 +378,7 @@ CircularBufferID Program::add_circular_buffer(const CoreRangeSet &core_range_set
     return circular_buffer->id();
 }
 
-std::shared_ptr<CircularBuffer> Program::get_circular_buffer(CircularBufferID cb_id) const {
+std::shared_ptr<CircularBuffer> Program::get_circular_buffer(CBHandle cb_id) const {
     if (this->circular_buffer_by_id_.find(cb_id) == this->circular_buffer_by_id_.end()) {
         TT_THROW("No circular buffer with id {} exists in Program {}", cb_id, this->id);
     }
diff --git a/tt_metal/impl/program/program.hpp b/tt_metal/impl/program/program.hpp
index d22ee2745fb..8fbe26330c5 100644
--- a/tt_metal/impl/program/program.hpp
+++ b/tt_metal/impl/program/program.hpp
@@ -24,26 +24,26 @@ namespace tt_metal {
 // Fwd declares
 namespace detail{
     void ValidateCircularBufferRegion(const Program &program, const Device *device);
-    KernelID AddKernel ( Program & program, Kernel * kernel);
-    Kernel *GetKernel(const Program &program, KernelID kernel_id);
-    std::shared_ptr<CircularBuffer> GetCircularBuffer(const Program &program, CircularBufferID id);
+    KernelHandle AddKernel ( Program & program, Kernel * kernel);
+    Kernel *GetKernel(const Program &program, KernelHandle kernel_id);
+    std::shared_ptr<CircularBuffer> GetCircularBuffer(const Program &program, CBHandle id);
 }
 
 struct KernelGroup {
     CoreRangeSet core_ranges;
-    std::optional<KernelID> compute_id = std::nullopt;
-    std::optional<KernelID> riscv0_id = std::nullopt;
-    std::optional<KernelID> riscv1_id = std::nullopt;
-    std::optional<KernelID> erisc_id = std::nullopt;
+    std::optional<KernelHandle> compute_id = std::nullopt;
+    std::optional<KernelHandle> riscv0_id = std::nullopt;
+    std::optional<KernelHandle> riscv1_id = std::nullopt;
+    std::optional<KernelHandle> erisc_id = std::nullopt;
     launch_msg_t launch_msg;
 
     KernelGroup();
     KernelGroup(
         const Program &program,
-        std::optional<KernelID> brisc_id,
-        std::optional<KernelID> ncrisc_id,
-        std::optional<KernelID> trisc_id,
-        std::optional<KernelID> erisc_id,
+        std::optional<KernelHandle> brisc_id,
+        std::optional<KernelHandle> ncrisc_id,
+        std::optional<KernelHandle> trisc_id,
+        std::optional<KernelHandle> erisc_id,
         int last_cb_index,
         const CoreRangeSet &new_ranges);
 };
@@ -133,7 +133,7 @@ class Program {
     CoreCoord grid_extent_;
 
     std::vector<std::shared_ptr<CircularBuffer>> circular_buffers_;
-    std::unordered_map<CircularBufferID,  std::shared_ptr<CircularBuffer>> circular_buffer_by_id_;
+    std::unordered_map<CBHandle,  std::shared_ptr<CircularBuffer>> circular_buffer_by_id_;
     // Tracks which circular buffer indices are being used
     std::unordered_map<CoreCoord, std::bitset<NUM_CIRCULAR_BUFFERS>> per_core_cb_indices_;
     // Used to generate circular buffer addresses. There is one CircularBufferAllocator per unique CoreRange
@@ -149,19 +149,19 @@ class Program {
     std::vector<KernelGroup> kernel_groups_;
     std::vector<uint8_t> core_to_kernel_group_index_table_;
 
-    friend CircularBufferID CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config);
-    friend std::shared_ptr<CircularBuffer> detail::GetCircularBuffer(const Program &program, CircularBufferID id);
+    friend CBHandle CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config);
+    friend std::shared_ptr<CircularBuffer> detail::GetCircularBuffer(const Program &program, CBHandle id);
     friend void detail::ValidateCircularBufferRegion(const Program &program, const Device *device);
 
-    friend KernelID detail::AddKernel(Program &program, Kernel *kernel);
-    friend Kernel *detail::GetKernel(const Program &program, KernelID kernel_id);
+    friend KernelHandle detail::AddKernel(Program &program, Kernel *kernel);
+    friend Kernel *detail::GetKernel(const Program &program, KernelHandle kernel_id);
 
     friend uint32_t CreateSemaphore(Program &program, const std::variant<CoreRange,CoreRangeSet> &core_spec, uint32_t initial_value);
-    KernelID add_kernel(Kernel *kernel);
-    Kernel *get_kernel(KernelID kernel_id) const;
+    KernelHandle add_kernel(Kernel *kernel);
+    Kernel *get_kernel(KernelHandle kernel_id) const;
 
-    CircularBufferID add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config);
-    std::shared_ptr<CircularBuffer> get_circular_buffer(CircularBufferID cb_id) const;
+    CBHandle add_circular_buffer(const CoreRangeSet &core_range_set, const CircularBufferConfig &config);
+    std::shared_ptr<CircularBuffer> get_circular_buffer(CBHandle cb_id) const;
 
     void add_semaphore(const CoreRangeSet & crs, uint32_t address, uint32_t init_value);
 
diff --git a/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp b/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp
index 74bf27d3e92..810e181e996 100644
--- a/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp
+++ b/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp
@@ -92,28 +92,28 @@ int main(int argc, char **argv) {
         constexpr uint32_t src0_cb_index = CB::c_in0;
         constexpr uint32_t num_input_tiles = 2;
         CircularBufferConfig cb_src0_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src0_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src0_cb_index, single_tile_size);
-        CircularBufferID cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);
+        CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);
 
         constexpr uint32_t src1_cb_index = CB::c_in1;
         CircularBufferConfig cb_src1_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src1_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src1_cb_index, single_tile_size);
-        CircularBufferID cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config);
+        CBHandle cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config);
 
         constexpr uint32_t output_cb_index = CB::c_out0;
         constexpr uint32_t num_output_tiles = 2;
         CircularBufferConfig cb_output_config = CircularBufferConfig(num_output_tiles * single_tile_size, {{output_cb_index, tt::DataFormat::Float16_b}}).set_page_size(output_cb_index, single_tile_size);
-        CircularBufferID cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config);
+        CBHandle cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config);
 
         /*
          * Specify data movement kernels for reading/writing data to/from
          * DRAM.
          */
-        KernelID binary_reader_kernel_id = CreateKernel(
+        KernelHandle binary_reader_kernel_id = CreateKernel(
             program,
             "tt_metal/kernels/dataflow/reader_binary_diff_lengths.cpp",
             core,
             DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default});
 
-        KernelID unary_writer_kernel_id = CreateKernel(
+        KernelHandle unary_writer_kernel_id = CreateKernel(
             program,
             "tt_metal/kernels/dataflow/writer_unary.cpp",
             core,
@@ -132,7 +132,7 @@ int main(int argc, char **argv) {
          * Use the add_tiles operation available in the eltwise_binary
          * compute kernel.
          */
-        KernelID eltwise_binary_kernel_id = CreateKernel(
+        KernelHandle eltwise_binary_kernel_id = CreateKernel(
             program,
             "tt_metal/kernels/compute/eltwise_binary.cpp",
             core,
diff --git a/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp b/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp
index 562d5c26325..44ec4de8746 100644
--- a/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp
+++ b/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp
@@ -63,24 +63,24 @@ int main(int argc, char **argv) {
         constexpr uint32_t src0_cb_index = CB::c_in0;
         constexpr uint32_t num_input_tiles = 2;
         CircularBufferConfig cb_src0_config = CircularBufferConfig(num_input_tiles * single_tile_size, {{src0_cb_index, tt::DataFormat::Float16_b}}).set_page_size(src0_cb_index, single_tile_size);
-        CircularBufferID cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);
+        CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);
 
         constexpr uint32_t output_cb_index = CB::c_out0;
         constexpr uint32_t num_output_tiles = 2;
         CircularBufferConfig cb_output_config = CircularBufferConfig(num_output_tiles * single_tile_size, {{output_cb_index, tt::DataFormat::Float16_b}}).set_page_size(output_cb_index, single_tile_size);
-        CircularBufferID cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config);
+        CBHandle cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config);
 
         /*
          * Specify data movement kernels for reading/writing data to/from
          * DRAM.
          */
-        KernelID unary_reader_kernel_id = CreateKernel(
+        KernelHandle unary_reader_kernel_id = CreateKernel(
             program,
             "tt_metal/kernels/dataflow/reader_unary.cpp",
             core,
             DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default});
 
-        KernelID unary_writer_kernel_id = CreateKernel(
+        KernelHandle unary_writer_kernel_id = CreateKernel(
             program,
             "tt_metal/kernels/dataflow/writer_unary.cpp",
             core,
@@ -105,7 +105,7 @@ int main(int argc, char **argv) {
             {"SFPU_OP_CHAIN_0", "exp_tile_init(); exp_tile(0);"}
         };
 
-        KernelID eltwise_sfpu_kernel_id = CreateKernel(
+        KernelHandle eltwise_sfpu_kernel_id = CreateKernel(
             program,
             "tt_metal/kernels/compute/eltwise_sfpu.cpp",
             core,
diff --git a/tt_metal/programming_examples/loopback/loopback.cpp b/tt_metal/programming_examples/loopback/loopback.cpp
index 225ff192e39..33de9f92008 100644
--- a/tt_metal/programming_examples/loopback/loopback.cpp
+++ b/tt_metal/programming_examples/loopback/loopback.cpp
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
 
         constexpr CoreCoord core = {0, 0};
 
-        KernelID dram_copy_kernel_id = CreateKernel(
+        KernelHandle dram_copy_kernel_id = CreateKernel(
             program,
             "tt_metal/programming_examples/loopback/kernels/loopback_dram_copy.cpp",
             core,
diff --git a/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp b/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp
index ba2cedd9a44..5341bf54bb0 100644
--- a/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp
+++ b/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp
@@ -354,8 +354,8 @@ void matmul_multicore_reuse_mcast(vector<uint32_t>& a, vector<uint32_t>& b, vect
     /*
     * Kernels - Runtime arguments
     */
-    std::vector<KernelID> reader_kernel_ids;
-    std::vector<KernelID> writer_kernel_ids;
+    std::vector<KernelHandle> reader_kernel_ids;
+    std::vector<KernelHandle> writer_kernel_ids;
     for(int core_idx_y = 0; core_idx_y < num_cores_r; core_idx_y++) {
         for(int core_idx_x = 0; core_idx_x < num_cores_c; core_idx_x++) {
             CoreCoord core = {(std::size_t) start_core_x + core_idx_x, (std::size_t) start_core_y + core_idx_y};
diff --git a/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp b/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp
index 2206088932d..3c2d5d70037 100644
--- a/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp
+++ b/tt_metal/programming_examples/profiler/test_custom_cycle_count/test_custom_cycle_count.cpp
@@ -25,18 +25,18 @@ bool RunCustomCycle(tt_metal::Device *device, int loop_count, string run_name =
         {"LOOP_SIZE", std::to_string(loop_size)}
     };
 
-    tt_metal::KernelID brisc_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel(
         program, "tt_metal/programming_examples/profiler/test_custom_cycle_count/kernels/custom_cycle_count.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .defines = kernel_defines});
 
-    tt_metal::KernelID ncrisc_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle ncrisc_kernel = tt_metal::CreateKernel(
         program, "tt_metal/programming_examples/profiler/test_custom_cycle_count/kernels/custom_cycle_count.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .defines = kernel_defines});
 
     vector<uint32_t> trisc_kernel_args = {};
-    tt_metal::KernelID trisc_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle trisc_kernel = tt_metal::CreateKernel(
         program, "tt_metal/programming_examples/profiler/test_custom_cycle_count/kernels/custom_cycle_count_compute.cpp",
         all_cores,
         tt_metal::ComputeConfig{.compile_args = trisc_kernel_args, .defines = kernel_defines}
diff --git a/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp b/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp
index 31e96736bc2..8a6b54589e1 100644
--- a/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp
+++ b/tt_metal/programming_examples/profiler/test_full_buffer/test_full_buffer.cpp
@@ -25,18 +25,18 @@ bool RunCustomCycle(tt_metal::Device *device, int loop_count, string run_name =
         {"LOOP_SIZE", std::to_string(loop_size)}
     };
 
-    tt_metal::KernelID brisc_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel(
         program, "tt_metal/programming_examples/profiler/test_full_buffer/kernels/full_buffer.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .defines = kernel_defines});
 
-    tt_metal::KernelID ncrisc_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle ncrisc_kernel = tt_metal::CreateKernel(
         program, "tt_metal/programming_examples/profiler/test_full_buffer/kernels/full_buffer.cpp",
         all_cores,
         tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .defines = kernel_defines});
 
     vector<uint32_t> trisc_kernel_args = {};
-    tt_metal::KernelID trisc_kernel = tt_metal::CreateKernel(
+    tt_metal::KernelHandle trisc_kernel = tt_metal::CreateKernel(
         program, "tt_metal/programming_examples/profiler/test_full_buffer/kernels/full_buffer_compute.cpp",
         all_cores,
         tt_metal::ComputeConfig{.compile_args = trisc_kernel_args, .defines = kernel_defines}
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index 9eab23ad9c3..a5b679b7817 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -69,13 +69,13 @@ std::optional<uint32_t> get_semaphore_address(const Program &program, const Core
 }
 
 
-inline void SetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreCoord &c, const std::vector<uint32_t> &runtime_args)
+inline void SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreCoord &c, const std::vector<uint32_t> &runtime_args)
 {
     detail::GetKernel(program, kernel_id)->set_runtime_args(c, runtime_args);
 }
 
 
-inline void SetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreRange &core_range, const std::vector<uint32_t> &runtime_args)
+inline void SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreRange &core_range, const std::vector<uint32_t> &runtime_args)
 {
     for (auto x = core_range.start.x; x <= core_range.end.x; x++) {
         for (auto y = core_range.start.y; y <= core_range.end.y; y++) {
@@ -351,8 +351,8 @@ Program CreateProgram(){
     return Program();
 }
 
-KernelID CreateKernel(Program &program, const std::string &file_name, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const std::variant<DataMovementConfig,ComputeConfig, experimental::EthernetConfig> &config) {
-    return std::visit( [&](auto&& cfg) -> KernelID
+KernelHandle CreateKernel(Program &program, const std::string &file_name, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const std::variant<DataMovementConfig,ComputeConfig, experimental::EthernetConfig> &config) {
+    return std::visit( [&](auto&& cfg) -> KernelHandle
                         {
                             CoreRangeSet core_ranges = detail::GetCoreRangeSet(core_spec);
                             Kernel * kernel;
@@ -373,16 +373,16 @@ KernelID CreateKernel(Program &program, const std::string &file_name, const std:
                     );
 }
 
-CircularBufferID CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config) {
+CBHandle CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config) {
     CoreRangeSet core_ranges = detail::GetCoreRangeSet(core_spec);
     return program.add_circular_buffer(core_ranges, config);
 }
 
-const CircularBufferConfig &GetCircularBufferConfig(Program &program, CircularBufferID cb_handle) {
+const CircularBufferConfig &GetCircularBufferConfig(Program &program, CBHandle cb_handle) {
     return detail::GetCircularBuffer(program, cb_handle)->config();
 }
 
-void UpdateCircularBufferTotalSize(Program &program, CircularBufferID cb_handle, uint32_t total_size) {
+void UpdateCircularBufferTotalSize(Program &program, CBHandle cb_handle, uint32_t total_size) {
     std::shared_ptr<CircularBuffer> circular_buffer = detail::GetCircularBuffer(program, cb_handle);
     if (not circular_buffer->globally_allocated()) {
         program.invalidate_circular_buffer_allocation();
@@ -390,11 +390,11 @@ void UpdateCircularBufferTotalSize(Program &program, CircularBufferID cb_handle,
     circular_buffer->config().set_total_size(total_size);
 }
 
-void UpdateCircularBufferPageSize(Program &program, CircularBufferID cb_handle, uint8_t buffer_index, uint32_t page_size) {
+void UpdateCircularBufferPageSize(Program &program, CBHandle cb_handle, uint8_t buffer_index, uint32_t page_size) {
     detail::GetCircularBuffer(program, cb_handle)->config().set_page_size(buffer_index, page_size);
 }
 
-void UpdateDynamicCircularBufferAddress(Program &program, CircularBufferID cb_handle, const Buffer &buffer) {
+void UpdateDynamicCircularBufferAddress(Program &program, CBHandle cb_handle, const Buffer &buffer) {
     if (buffer.buffer_type() != BufferType::L1) {
         TT_FATAL("Only L1 buffers can have an associated circular buffer!");
     }
@@ -442,7 +442,7 @@ Buffer CreateBuffer(Device *device, std::uint64_t size, std::uint64_t page_size,
 
 void DeallocateBuffer(Buffer &buffer) { buffer.deallocate(); }
 
-void SetRuntimeArgs(const Program &program, KernelID kernel_id, const std::variant<CoreCoord,CoreRange,CoreRangeSet> &core_spec, const std::vector<uint32_t> &runtime_args) {
+void SetRuntimeArgs(const Program &program, KernelHandle kernel_id, const std::variant<CoreCoord,CoreRange,CoreRangeSet> &core_spec, const std::vector<uint32_t> &runtime_args) {
     ZoneScoped;
     std::visit(
         [&](auto&& core_spec)
@@ -461,7 +461,7 @@ void SetRuntimeArgs(const Program &program, KernelID kernel_id, const std::varia
     );
 }
 
-void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector<uint32_t> > &runtime_args)
+void SetRuntimeArgs(const Program &program, KernelHandle kernel, const std::vector< CoreCoord > & core_spec, const std::vector< std::vector<uint32_t> > &runtime_args)
 {
     ZoneScoped;
     TT_FATAL( core_spec.size() == runtime_args.size(), "Mistmatch between number of cores {} and number of runtime args {} getting updated", core_spec.size(), runtime_args.size());
@@ -470,7 +470,7 @@ void SetRuntimeArgs(const Program &program, KernelID kernel, const std::vector<
         k->set_runtime_args(core_spec[i], runtime_args[i]);
 }
 
-std::vector<uint32_t> & GetRuntimeArgs(const Program &program, KernelID kernel_id, const CoreCoord &logical_core) {
+std::vector<uint32_t> & GetRuntimeArgs(const Program &program, KernelHandle kernel_id, const CoreCoord &logical_core) {
     return detail::GetKernel(program, kernel_id)->runtime_args(logical_core);
 }