Skip to content

Commit

Permalink
torch_npu support aclnn and add op.
Browse files Browse the repository at this point in the history
  • Loading branch information
momo609 committed Dec 7, 2023
1 parent 72ec8e6 commit 4d1e6fb
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 12 deletions.
2 changes: 1 addition & 1 deletion docs/en/understand_mmcv/ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ We implement common ops used in detection, segmentation, etc.
| PointsInBoxes ||| | | |
| PointsInPolygons | || | ||
| PSAMask |||| ||
| RotatedFeatureAlign |||| | |
| RotatedFeatureAlign |||| | |
| RoIPointPool3d | ||| | |
| RoIPool | ||| ||
| RoIAlignRotated |||| | |
Expand Down
2 changes: 1 addition & 1 deletion docs/zh_cn/understand_mmcv/ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ MMCV 提供了检测、分割等任务中常用的算子
| PointsInBoxes ||| | | |
| PointsInPolygons | || | | |
| PSAMask |||| ||
| RotatedFeatureAlign |||| | |
| RotatedFeatureAlign |||| | |
| RoIPointPool3d | ||| | |
| RoIPool | ||| ||
| RoIAlignRotated |||| | |
Expand Down
14 changes: 14 additions & 0 deletions mmcv/ops/csrc/pytorch/npu/common_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef MMCV_OPS_CSRC_COMMON__UTIL_HPP_
#define MMCV_OPS_CSRC_COMMON__UTIL_HPP_
const int SIZE = 8;

c10::SmallVector<int64_t, SIZE> array_to_vector(c10::IntArrayRef shape) {
c10::SmallVector<int64_t, SIZE> shape_small_vec;
for (uint64_t i = 0; i < shape.size(); i++) {
shape_small_vec.emplace_back(shape[i]);
}

return shape_small_vec;
}

#endif // MMCV_OPS_CSRC_COMMON__UTIL_HPP_
11 changes: 8 additions & 3 deletions mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "pytorch_npu_helper.hpp"

using namespace NPU_NAME_SPACE;
using namespace std;

Expand Down Expand Up @@ -101,8 +100,14 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
at::IntArrayRef offset = at::IntArrayRef(offsets);
at::IntArrayRef size = at::IntArrayRef(sizes);
at::IntArrayRef size_array = at::IntArrayRef(sizes);
c10::SmallVector<int64_t, N> offsetVec = array_to_small_vector(offset);
c10::SmallVector<int64_t, N> sizeVec = array_to_small_vector(size_array);
c10::SmallVector<int64_t, 8> offsetVec;
for (uint64_t i = 0; i < offset.size(); i++) {
offsetVec.emplace_back(offset[i]);
}
c10::SmallVector<int64_t, 8> sizeVec;
for (uint64_t i = 0; i < size_array.size(); i++) {
sizeVec.emplace_back(size_array[i]);
}
OpCommand cmd2;
cmd2.Name("Slice")
.Input(op_output)
Expand Down
4 changes: 3 additions & 1 deletion mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
auto input_size = input.sizes();
int input_length = input_size.size();
c10::SmallVector<int64_t, SIZE> input_size_tmp;
input_size_tmp = array_to_small_vector(input_size);
for (uint64_t i = 0; i < input_size.size(); i++) {
input_size_tmp.emplace_back(input_size[i]);
}
if (input_length > 1) {
for (int i = 0; i < input_length; i++) {
if (i != 1) {
Expand Down
44 changes: 44 additions & 0 deletions mmcv/ops/csrc/pytorch/npu/gather_points_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,50 @@ void gather_points_forward_npu(int b, int c, int n, int npoints,
.Attr("batch_dims", batch_dims)
.Run();
}
void gather_points_backward_npu(int b, int c, int n, int npoints,
const Tensor grad_out, const Tensor idx,
Tensor grad_points) {
at::Tensor indices = idx;
if (idx.scalar_type() != at::ScalarType::Int) {
indices = idx.to(at::kInt);
}
if (idx.dim() == 0) {
indices.unsqueeze_(0);
}
int64_t dim = 0;
auto shape = idx.sizes();
c10::SmallVector<int64_t, 8> pad_size;
for (uint64_t i = 0; i < shape.size(); i++) {
pad_size.emplace_back(shape[i]);
}
at::Tensor trans_grad_points = grad_points.transpose(1, 2).contiguous();
at::Tensor grad_points_view = trans_grad_points.view(
{trans_grad_points.sizes()[0] * trans_grad_points.sizes()[1],
trans_grad_points.sizes()[2]});
at::Tensor trans_grad_out = grad_out.transpose(1, 2).contiguous();
trans_grad_out = trans_grad_out.view(
{trans_grad_out.sizes()[0] * trans_grad_out.sizes()[1],
trans_grad_out.sizes()[2]});
auto index = at::arange(0, b);
index = index.to(grad_out.device());
index = at::mul(index, n);
index = index.view({b, 1});
index = at::broadcast_to(index, pad_size);
indices = at::add(index, indices);
indices = indices.view({-1});
OpCommand cmd;
cmd.Name("InplaceIndexAdd")
.Input(grad_points_view)
.Input(indices)
.Input(trans_grad_out)
.Output(grad_points_view)
.Attr("axis", dim)
.Run();
at::Tensor grad_points_result =
grad_points_view.view(trans_grad_points.sizes());
grad_points_result = grad_points_result.transpose(1, 2);
grad_points.copy_(grad_points_result);
}

void gather_points_forward_impl(int b, int c, int n, int npoints,
const Tensor points, const Tensor idx,
Expand Down
13 changes: 10 additions & 3 deletions mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,16 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
int64_t aligned_height_64 = aligned_height;
int64_t aligned_width_64 = aligned_width;
int64_t sampling_ratio_64 = sampling_ratio;
int64_t roi_end_mode = 0;
c10::SmallVector<int64_t, SIZE> xdiff_shape =
array_to_small_vector(grad_input.sizes());
int64_t roi_end_mode = 2;
if (!aligned) {
LOG(WARNING) << "The [aligned] attr in roi_align_grad op is false";
roi_end_mode = 0;
}
auto shape = grad_input.sizes();
c10::SmallVector<int64_t, SIZE> xdiff_shape;
for (uint64_t i = 0; i < shape.size(); i++) {
xdiff_shape.emplace_back(shape[i]);
}
OpCommand cmd;
cmd.Name("ROIAlignGrad")
.Input(grad_output)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,9 @@ def get_mluops_version(file_path):
]
define_macros += [('MMCV_WITH_NPU', None)]
extension = NpuExtension
if parse_version(torch.__version__) <= parse_version('2.0.0'):
if parse_version(torch.__version__) < parse_version('2.1.0'):
define_macros += [('MMCV_WITH_XLA', None)]
if parse_version(torch.__version__) > parse_version('2.0.0'):
if parse_version(torch.__version__) >= parse_version('2.1.0'):
define_macros += [('MMCV_WITH_KPRIVATE', None)]
except Exception:
raise ImportError('can not find any torch_npu')
Expand Down
6 changes: 5 additions & 1 deletion tests/test_ops/test_rotated_feature_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch

from mmcv.ops import rotated_feature_align
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE


@pytest.mark.skipif(
Expand All @@ -17,6 +17,10 @@
'mlu',
marks=pytest.mark.skipif(
not IS_MLU_AVAILABLE, reason='requires MLU support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support')),
pytest.param(
'cpu',
marks=pytest.mark.skipif(
Expand Down

0 comments on commit 4d1e6fb

Please sign in to comment.