Skip to content

Commit

Permalink
#4141: Add GetPreferredNOCForDRAMRead, GetPreferredNOCForDRAMWrite an…
Browse files Browse the repository at this point in the history
…d update all ops to use these apis
  • Loading branch information
tt-aho committed Dec 5, 2023
1 parent 1d9d0a8 commit 15154c3
Show file tree
Hide file tree
Showing 83 changed files with 301 additions and 236 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ operation::ProgramWithCallbacks bcast_multi_core_h(const Tensor &a, const Tensor
program,
reader_name,
all_device_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args});

KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp",
all_device_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

std::map<std::string, std::string> bcast_defines = bcast_op_utils::get_defines(bcast_dim, bcast_math);
auto bcast_kernel_id = tt_metal::CreateKernel(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso
program,
reader_name,
all_device_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args, .defines = reader_defines});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args, .defines = reader_defines});

KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
all_device_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

auto bcast_kernel_id = tt_metal::CreateKernel(
program,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ operation::ProgramWithCallbacks bcast_multi_core_w(const Tensor &a, const Tensor
program,
reader_name,
all_device_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args});

KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_unary_8bank_input_cols_batched.cpp",
all_device_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

std::map<std::string, std::string> bcast_defines = bcast_op_utils::get_defines(bcast_dim, bcast_math);
auto bcast_kernel_id = tt_metal::CreateKernel(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ operation::ProgramWithCallbacks bcast_single_core(const Tensor &a, const Tensor
program,
reader_name,
core,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args});

KernelHandle unary_writer_kernel_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
core,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

const char* compute_name = bcast_op_utils::get_compute_name(bcast_dim);
std::map<std::string, std::string> bcast_defines = bcast_op_utils::get_defines(bcast_dim, bcast_math);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ operation::ProgramWithCallbacks matmul_multi_core(const Tensor &a, const Tensor
program,
"tt_eager/tt_dnn/kernels/dataflow/reader_bmm_8bank_output_tiles_partitioned.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args});

auto writer_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_unary_interleaved_start_id.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

vector<uint32_t> compute_args_group_1 = {
1, // B
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,13 @@ tt_metal::operation::ProgramWithCallbacks create_program(
program,
"tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args});

auto unary_writer_kernel_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

// Create compute kernel
auto mm_kernel_id = tt_metal::CreateKernel(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ tt_metal::operation::ProgramWithCallbacks create_program(
program,
"tt_eager/tt_dnn/kernels/dataflow/reader_bmm_tile_layout.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = reader_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = reader_compile_time_args});

auto unary_writer_kernel_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/dataflow/writer_bmm_tile_layout.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = writer_compile_time_args});

// Create compute kernel
auto mm_kernel_id = tt_metal::CreateKernel(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,23 +251,22 @@ operation::ProgramWithCallbacks create_program_mcast_in0(
program,
in0_is_sharded ? "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_sender_receiver_padding_block_sharded.cpp" : "tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp",
mcast_sender,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_sender_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = in0_sender_compile_time_args});

auto mm_kernel_in1_sender_writer_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines});


KernelHandle mm_kernel_in0_receiver_id = 0;
if (!in0_is_sharded) {
mm_kernel_in0_receiver_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_receiver.cpp",
/* in0_receiver_in1_sender, // If not using half-half noc setup */
mcast_receivers,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_receiver_compile_time_args});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = in0_receiver_compile_time_args});
}
// Compute kernel compile time args

Expand Down Expand Up @@ -771,20 +770,20 @@ operation::ProgramWithCallbacks create_program_mcast_in1(
program,
"tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp",
all_cores,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = in0_sender_compile_time_args, .defines = mm_kernel_in0_sender_defines});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch()), .compile_args = in0_sender_compile_time_args, .defines = mm_kernel_in0_sender_defines});

auto mm_kernel_in1_sender_writer_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp",
mcast_sender,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = in1_sender_writer_compile_time_args, .defines = mm_kernel_in1_sender_writer_defines});


auto mm_kernel_in1_receiver_writer_id = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/op_library/bmm/kernels/dataflow/reader_bmm_tile_layout_in1_receiver_writer_padding.cpp",
mcast_receivers,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_1_default, .compile_args = in1_receiver_writer_compile_time_args, .defines = mm_kernel_in1_receiver_writer_defines});
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch()), .compile_args = in1_receiver_writer_compile_time_args, .defines = mm_kernel_in1_receiver_writer_defines});

// Compute kernel compile time args

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,11 @@ operation::ProgramWithCallbacks create_program_mcast_in0_in1(
uint32_t in0_end = num_cores_r - 1;
uint32_t in1_end = num_cores_c - 1;

tt_metal::NOC in0_noc = tt_metal::NOC::RISCV_0_default;
tt_metal::NOC in1_noc = tt_metal::NOC::RISCV_1_default;
tt_metal::NOC in0_split_noc = tt_metal::NOC::RISCV_1_default;
tt_metal::NOC in1_split_noc = tt_metal::NOC::RISCV_0_default;
// in1 is the reader of weights/output writer, and we choose to make it use the optimized reader noc
tt_metal::NOC in0_noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch());
tt_metal::NOC in1_noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch());
tt_metal::NOC in0_split_noc = tt_metal::GetPreferredNOCForDRAMRead(device->arch());
tt_metal::NOC in1_split_noc = tt_metal::GetPreferredNOCForDRAMWrite(device->arch());
if (transpose_mcast) {
std::swap(in0_sender, in1_sender);
std::swap(in0_sender_in1_receiver, in0_receiver_in1_sender);
Expand Down
Loading

0 comments on commit 15154c3

Please sign in to comment.