diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 14b6b339c11f3..f61604ffabee6 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -25,6 +25,7 @@ Do not modify directly.* |||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |Affine|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| +|AffineGrid|*in* theta:**T1**
*in* size:**T2**
*out* grid:**T1**|20+|**T1** = tensor(float)
**T2** = tensor(int64)| |And|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)
**T1** = tensor(bool)| |ArgMax|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int8), tensor(uint8)| |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int8), tensor(uint8)| diff --git a/onnxruntime/core/providers/cpu/tensor/affine_grid.cc b/onnxruntime/core/providers/cpu/tensor/affine_grid.cc index 3993ff38738c7..6c16aa9c5a654 100644 --- a/onnxruntime/core/providers/cpu/tensor/affine_grid.cc +++ b/onnxruntime/core/providers/cpu/tensor/affine_grid.cc @@ -13,95 +13,99 @@ namespace onnxruntime { -#define REGISTER_KERNEL_TYPED(T) \ - ONNX_CPU_OPERATOR_TYPED_KERNEL( \ - AffineGrid, \ - 20, \ - T, \ - KernelDefBuilder() \ - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ + AffineGrid, \ + 20, \ + T, \ + KernelDefBuilder() \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ AffineGrid); REGISTER_KERNEL_TYPED(float) void generate_base_grid_2d(int64_t H, int64_t W, bool align_corners, Eigen::Matrix& base_grid) { - Eigen::VectorXf row_vec = Eigen::VectorXf::LinSpaced(W, -1, 1); + Eigen::VectorXf row_vec = Eigen::VectorXf::LinSpaced(static_cast(W), -1, 1); if (!align_corners) { row_vec = row_vec * (W - 1) / W; } - Eigen::VectorXf col_vec = Eigen::VectorXf::LinSpaced(H, -1, 1); + Eigen::VectorXf col_vec = Eigen::VectorXf::LinSpaced(static_cast(H), -1, 1); if (!align_corners) { col_vec = col_vec * (H - 1) / H; } base_grid.resize(H * W, 2); - for (int j = 0; j < H; j++) { - for (int i = 0; i < W; i++) { + for (Eigen::Index j = 0; j < H; j++) { + for (Eigen::Index i = 0; i < W; i++) { base_grid.row(j * W + i) << row_vec(i), col_vec(j); } } } void generate_base_grid_3d(int64_t D, int64_t H, int64_t W, bool align_corners, Eigen::Matrix& base_grid) { - Eigen::VectorXf row_vec = Eigen::VectorXf::LinSpaced(W, -1, 1); + Eigen::VectorXf row_vec = Eigen::VectorXf::LinSpaced(static_cast(W), -1, 1); if (!align_corners) { row_vec = row_vec * (W - 1) / W; } - Eigen::VectorXf col_vec = Eigen::VectorXf::LinSpaced(H, -1, 1); + Eigen::VectorXf col_vec = Eigen::VectorXf::LinSpaced(static_cast(H), -1, 1); if (!align_corners) { col_vec = col_vec * (H - 1) / H; } - - Eigen::VectorXf slice_vec = Eigen::VectorXf::LinSpaced(D, -1, 1); + Eigen::VectorXf slice_vec = Eigen::VectorXf::LinSpaced(static_cast(D), -1, 1); if (!align_corners) { slice_vec = slice_vec * (D - 1) / D; } base_grid.resize(D * H * W, 3); - for (int k = 0; k < D; k++) { - for (int j = 0; j < H; j++) { - for (int i = 0; i < W; i++) { + for (Eigen::Index k = 0; k < D; k++) { + for (Eigen::Index j = 0; j < H; j++) { + for (Eigen::Index i = 0; i < W; i++) { base_grid.row(k * H * W + j * W + i) << row_vec(i), col_vec(j), slice_vec(k); } } } } -void affine_grid_generator_2d(const Tensor* theta, const Eigen::Matrix& base_grid_transposed, int64_t batch_num, int64_t H, int64_t W, Tensor* grid) { - const Eigen::StorageOptions option = Eigen::RowMajor; - auto theta_batch_offset = batch_num * 2 * 3; - const float* theta_data = theta->Data() + theta_batch_offset; - const Eigen::Matrix theta_R{{theta_data[0], theta_data[1]}, {theta_data[3], theta_data[4]}}; - const Eigen::Array theta_T(theta_data[2], theta_data[5]); - - auto grid_batch_offset = batch_num * H * W * 2; - float* grid_data = grid->MutableData() + grid_batch_offset; - Eigen::Map> grid_matrix(grid_data, narrow(H * W), 2); - grid_matrix = ((theta_R * base_grid_transposed).array().colwise() + theta_T).matrix().transpose(); -} +template +struct AffineGridGenerator2D { + void operator()(const Tensor* theta, const Eigen::Matrix& base_grid_transposed, int64_t batch_num, int64_t H, int64_t W, Tensor* grid) const { + const Eigen::StorageOptions option = Eigen::RowMajor; + auto theta_batch_offset = batch_num * 2 * 3; + const T* theta_data = theta->Data() + theta_batch_offset; + const Eigen::Matrix theta_R{{theta_data[0], theta_data[1]}, {theta_data[3], theta_data[4]}}; + const Eigen::Array theta_T(theta_data[2], theta_data[5]); + + auto grid_batch_offset = batch_num * H * W * 2; + T* grid_data = grid->MutableData() + grid_batch_offset; + Eigen::Map> grid_matrix(grid_data, narrow(H * W), 2); + grid_matrix = ((theta_R * base_grid_transposed).array().colwise() + theta_T).matrix().transpose(); + } +}; -void affine_grid_generator_3d(const Tensor* theta, const Eigen::Matrix& base_grid_transposed, int64_t batch_num, int64_t D, int64_t H, int64_t W, Tensor* grid) { - const Eigen::StorageOptions option = Eigen::RowMajor; - auto theta_batch_offset = batch_num * 3 * 4; - const float* theta_data = theta->Data() + theta_batch_offset; - const Eigen::Matrix theta_R{ - {theta_data[0], theta_data[1], theta_data[2]}, - {theta_data[4], theta_data[5], theta_data[6]}, - {theta_data[8], theta_data[9], theta_data[10]} - }; - const Eigen::Array theta_T(theta_data[3], theta_data[7], theta_data[11]); - - auto grid_batch_offset = batch_num * D * H * W * 3; - float* grid_data = grid->MutableData() + grid_batch_offset; - Eigen::Map> grid_matrix(grid_data, narrow(D * H * W), 3); - grid_matrix = ((theta_R * base_grid_transposed).array().colwise() + theta_T).matrix().transpose(); -} +template +struct AffineGridGenerator3D { + void operator()(const Tensor* theta, const Eigen::Matrix& base_grid_transposed, int64_t batch_num, int64_t D, int64_t H, int64_t W, Tensor* grid) { + const Eigen::StorageOptions option = Eigen::RowMajor; + auto theta_batch_offset = batch_num * 3 * 4; + const float* theta_data = theta->Data() + theta_batch_offset; + const Eigen::Matrix theta_R{ + {theta_data[0], theta_data[1], theta_data[2]}, + {theta_data[4], theta_data[5], theta_data[6]}, + {theta_data[8], theta_data[9], theta_data[10]}}; + const Eigen::Array theta_T(theta_data[3], theta_data[7], theta_data[11]); + + auto grid_batch_offset = batch_num * D * H * W * 3; + float* grid_data = grid->MutableData() + grid_batch_offset; + Eigen::Map> grid_matrix(grid_data, narrow(D * H * W), 3); + grid_matrix = ((theta_R * base_grid_transposed).array().colwise() + theta_T).matrix().transpose(); + } +}; template Status AffineGrid::Compute(OpKernelContext* context) const { const Tensor* theta = context->Input(0); - //const auto elem_type = theta.GetElementType(); + const auto elem_type = theta->GetElementType(); const TensorShape& theta_shape = theta->Shape(); if (theta_shape.NumDimensions() != 3) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "AffineGrid : Input theta tensor dimension is not 3"); @@ -113,17 +117,20 @@ Status AffineGrid::Compute(OpKernelContext* context) const { if (size_shape.GetDims()[0] == 4 /*&& get_check_2d_grid_sample_consistency(theta_shape, size_shape, N, C, H, W)*/) { int64_t N = size_data[0], H = size_data[2], W = size_data[3]; - + TensorShape grid_shape{N, H, W, 2}; auto grid = context->Output(0, grid_shape); - + Eigen::Matrix base_grid; generate_base_grid_2d(H, W, align_corners_, base_grid); Eigen::Matrix base_grid_transposed = base_grid.transpose(); - for (int64_t batch_num = 0; batch_num < N; batch_num++) { - affine_grid_generator_2d(theta, base_grid_transposed, batch_num, H, W, grid); - } + std::function fn = [elem_type, theta, base_grid_transposed, H, W, grid](ptrdiff_t batch_num) { + utils::MLTypeCallDispatcher t_disp(elem_type); + t_disp.Invoke(theta, base_grid_transposed, batch_num, H, W, grid); + }; + + concurrency::ThreadPool::TryBatchParallelFor(context->GetOperatorThreadPool(), narrow(N), std::move(fn), 0); } else if (size_shape.GetDims()[0] == 5 /*&& get_check_2d_grid_sample_consistency(theta_shape, size_shape, N, C, H, W)*/) { int64_t N = size_data[0], D = size_data[2], H = size_data[3], W = size_data[4]; @@ -134,9 +141,12 @@ Status AffineGrid::Compute(OpKernelContext* context) const { generate_base_grid_3d(D, H, W, align_corners_, base_grid); Eigen::Matrix base_grid_transposed = base_grid.transpose(); - for (int64_t batch_num = 0; batch_num < N; batch_num++) { - affine_grid_generator_3d(theta, base_grid_transposed, batch_num, D, H, W, grid); - } + std::function fn = [elem_type, theta, base_grid_transposed, D, H, W, grid](ptrdiff_t batch_num) { + utils::MLTypeCallDispatcher t_disp(elem_type); + t_disp.Invoke(theta, base_grid_transposed, batch_num, D, H, W, grid); + }; + + concurrency::ThreadPool::TryBatchParallelFor(context->GetOperatorThreadPool(), narrow(N), std::move(fn), 0); } else { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "AffineGrid : Invalidate size - length of size shall be 4 or 5."); } diff --git a/onnxruntime/core/providers/cpu/tensor/affine_grid.h b/onnxruntime/core/providers/cpu/tensor/affine_grid.h index d25ba6e067f90..d79d9f320ad45 100644 --- a/onnxruntime/core/providers/cpu/tensor/affine_grid.h +++ b/onnxruntime/core/providers/cpu/tensor/affine_grid.h @@ -10,18 +10,18 @@ namespace onnxruntime { template class AffineGrid final : public OpKernel { -public: - AffineGrid(const OpKernelInfo& info) : OpKernel(info) { - int64_t align_corners = info.GetAttrOrDefault("align_corners", 0); - align_corners_ = (align_corners != 0); - } + public: + AffineGrid(const OpKernelInfo& info) : OpKernel(info) { + int64_t align_corners = info.GetAttrOrDefault("align_corners", 0); + align_corners_ = (align_corners != 0); + } - Status Compute(OpKernelContext* context) const override; + Status Compute(OpKernelContext* context) const override; -private: - bool align_corners_; - int64_t dtype_; - int64_t k_; + private: + bool align_corners_; + int64_t dtype_; + int64_t k_; }; } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py index 26c52a497abf2..22bad6f1be534 100644 --- a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py +++ b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py @@ -21,7 +21,7 @@ for angle, translation, scale in zip(angles, translations, scales): for size in sizes: theta = np.array([], dtype=np.float32) - for _n in range(size[0]): + for _ in range(size[0]): angle_radian = (angle / 180.0) * np.pi theta = np.append( theta, @@ -68,28 +68,27 @@ for angle, translation, scale in zip(angles, translations, scales): for size in sizes: theta = np.array([], dtype=np.float32) - for _n in range(size[0]): + for _ in range(size[0]): angle_radian_x = (angle[0] / 180.0) * np.pi angle_radian_y = (angle[1] / 180.0) * np.pi - rotMatrix_x = np.array( + rot_matrix_x = np.array( [ [1, 0, 0], [0, np.cos(angle_radian_x), -np.sin(angle_radian_x)], [0, np.sin(angle_radian_x), np.cos(angle_radian_x)], ] ) - rotMatrix_y = np.array( + rot_matrix_y = np.array( [ [np.cos(angle_radian_y), 0, np.sin(angle_radian_y)], [0, 1, 0], [-np.sin(angle_radian_y), 0, np.cos(angle_radian_y)], ] ) - rotMatrix = np.matmul(rotMatrix_x, rotMatrix_y) - rotMatrix = rotMatrix * scale.reshape(3, 1) - translation = np.reshape(translation, (3, 1)) - rotMatrix = np.append(rotMatrix, translation, axis=1) - theta = np.append(theta, rotMatrix.flatten()) + rot_matrix = np.matmul(rot_matrix_x, rot_matrix_y) + rot_matrix = rot_matrix * scale.reshape(3, 1) + rot_matrix = np.append(rot_matrix, np.reshape(translation, (3, 1)), axis=1) + theta = np.append(theta, rot_matrix.flatten()) theta = theta.reshape(size[0], 3, 4) theta = torch.Tensor(theta) grid = affine_grid(theta, size, align_corners=align_corners)