From 7a374fb3b83389bbb9a9b6770ea6258a89259f68 Mon Sep 17 00:00:00 2001 From: Borys Bradel Date: Tue, 28 May 2024 20:07:45 +0000 Subject: [PATCH] #8112: Require bias for linear --- ttnn/cpp/pybind11/operations/matmul.hpp | 4 ++-- ttnn/cpp/ttnn/operations/conv2d.cpp | 13 +++++++++++-- ttnn/cpp/ttnn/operations/matmul.cpp | 15 ++++++--------- ttnn/cpp/ttnn/operations/matmul.hpp | 2 +- ttnn/ttnn/operations/matmul.py | 4 ++-- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/ttnn/cpp/pybind11/operations/matmul.hpp b/ttnn/cpp/pybind11/operations/matmul.hpp index 1c2573cba29..6262fd0d095 100644 --- a/ttnn/cpp/pybind11/operations/matmul.hpp +++ b/ttnn/cpp/pybind11/operations/matmul.hpp @@ -44,7 +44,7 @@ void py_module(py::module& module) { "linear", [](const ttnn::Tensor& input_tensor_a, const ttnn::Tensor& input_tensor_b, - const std::optional& bias, + const ttnn::Tensor& bias, const ttnn::MemoryConfig& memory_config = ttnn::DRAM_MEMORY_CONFIG, const std::optional dtype = std::nullopt, const std::optional program_config = std::nullopt, @@ -65,7 +65,7 @@ void py_module(py::module& module) { py::arg("input_tensor_a"), py::arg("input_tensor_b"), py::kw_only(), - py::arg("bias") = std::nullopt, + py::arg("bias"), py::arg("memory_config") = DRAM_MEMORY_CONFIG, py::arg("dtype") = std::nullopt, py::arg("program_config") = std::nullopt, diff --git a/ttnn/cpp/ttnn/operations/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv2d.cpp index 23a0e8a65dd..c2c68cc5093 100644 --- a/ttnn/cpp/ttnn/operations/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv2d.cpp @@ -704,10 +704,19 @@ std::tuple{conv_config.activation}, + compute_kernel_config) : + ttnn::operations::matmul::matmul( matmul_input, weight_tensor_on_device, - bias_tensor_on_device, matmul_program_config, conv_out_memory_config, conv_config.dtype, diff --git a/ttnn/cpp/ttnn/operations/matmul.cpp b/ttnn/cpp/ttnn/operations/matmul.cpp index 49ac7f9149c..650dfa31e27 100644 --- a/ttnn/cpp/ttnn/operations/matmul.cpp +++ b/ttnn/cpp/ttnn/operations/matmul.cpp @@ -106,7 +106,7 @@ std::optional get_fused_activation(const std::optional& bias, + const ttnn::Tensor& bias, const std::optional program_config, const ttnn::MemoryConfig& memory_config, std::optional dtype, @@ -129,16 +129,13 @@ ttnn::Tensor linear( const auto input_tensor_a_4d = ttnn::unsqueeze_to_4D(input_tensor_a); const auto input_tensor_b_4d = ttnn::unsqueeze_to_4D(input_tensor_b); - std::optional bias_4d = std::nullopt; const bool has_user_grid = core_grid.has_value(); const bool has_program_config = program_config.has_value(); bool post_process_bias = false; - if (bias.has_value()) { - bias_4d = ttnn::unsqueeze_to_4D(bias.value()); - if (!has_program_config && !has_user_grid) { - post_process_bias = true; - } + auto bias_4d = ttnn::unsqueeze_to_4D(bias); + if (!has_program_config && !has_user_grid) { + post_process_bias = true; } if (width_a != height_b) { @@ -150,11 +147,11 @@ ttnn::Tensor linear( } auto output_tensor = tt::operations::primary::matmul( - input_tensor_a_4d, input_tensor_b_4d, post_process_bias ? std::nullopt : bias_4d, program_config, memory_config, dtype, compute_kernel_config, false /*untilize_out*/, user_core_coord, get_fused_activation(activation)); + input_tensor_a_4d, input_tensor_b_4d, post_process_bias ? std::nullopt : std::make_optional(bias_4d), program_config, memory_config, dtype, compute_kernel_config, false /*untilize_out*/, user_core_coord, get_fused_activation(activation)); if (post_process_bias) { output_tensor = tt::tt_metal::bcast( - output_tensor, bias_4d.value(), tt::tt_metal::BcastOpMath::ADD, tt::tt_metal::BcastOpDim::H, memory_config); + output_tensor, bias_4d, tt::tt_metal::BcastOpMath::ADD, tt::tt_metal::BcastOpDim::H, memory_config); } if (activation.has_value() && !has_user_grid) { diff --git a/ttnn/cpp/ttnn/operations/matmul.hpp b/ttnn/cpp/ttnn/operations/matmul.hpp index e8b0de90320..8d3ef6d84b5 100644 --- a/ttnn/cpp/ttnn/operations/matmul.hpp +++ b/ttnn/cpp/ttnn/operations/matmul.hpp @@ -45,7 +45,7 @@ std::optional get_fused_activation(const std::optional& bias, + const ttnn::Tensor& bias, const std::optional program_config = std::nullopt, const ttnn::MemoryConfig& memory_config = ttnn::DRAM_MEMORY_CONFIG, std::optional dtype = std::nullopt, diff --git a/ttnn/ttnn/operations/matmul.py b/ttnn/ttnn/operations/matmul.py index 81d09080ddd..8e4f2b0985d 100644 --- a/ttnn/ttnn/operations/matmul.py +++ b/ttnn/ttnn/operations/matmul.py @@ -171,7 +171,7 @@ def linear( input_tensor_a: ttnn.Tensor, input_tensor_b: ttnn.Tensor, *, - bias: Optional[ttnn.Tensor] = None, + bias: ttnn.Tensor, memory_config: ttnn.MemoryConfig = ttnn.DRAM_MEMORY_CONFIG, dtype: Optional[ttnn.DataType] = None, core_grid: Optional[ttnn.CoreGrid] = None, @@ -190,7 +190,7 @@ def linear( * :attr:`input_tensor_b` (ttnn.Tensor): the second tensor to be multiplied Keyword Arguments: - * :attr:`bias` (Optional[ttnn.Tensor]): the bias tensor to be added. Defaults to None + * :attr:`bias` ([ttnn.Tensor]): the bias tensor to be added. Required. * :attr:`memory_config` (ttnn.MemoryConfig): the memory configuration of the output tensor. Defaults to ttnn.DRAM_MEMORY_CONFIG * :attr:`dtype` (Optional[ttnn.DataType]): the data type of the output tensor. Defaults to None * :attr:`core_grid` (Optional[ttnn.CoreGrid]): the grid on which to distribute the sharded tensor on (writes to the cores L1s). Defaults to None