Skip to content

Commit b5366a5

Browse files
committed
add roi_align_rotated npu adpater and promote roi_pool adpater.
1 parent 5040148 commit b5366a5

File tree

5 files changed

+104
-9
lines changed

5 files changed

+104
-9
lines changed

docs/zh_cn/understand_mmcv/ops.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ MMCV 提供了检测、分割等任务中常用的算子
2626
| FurthestPointSampleWithDist | || | | |
2727
| FusedBiasLeakyrelu | || | ||
2828
| GatherPoints | || | ||
29-
| GroupPoints | || | | |
29+
| GroupPoints | || | | |
3030
| Iou3d | ||| | |
3131
| KNN | || | | |
3232
| MaskedConv | ||| ||
@@ -44,7 +44,7 @@ MMCV 提供了检测、分割等任务中常用的算子
4444
| RotatedFeatureAlign |||| | |
4545
| RoIPointPool3d | ||| | |
4646
| RoIPool | ||| ||
47-
| RoIAlignRotated |||| | |
47+
| RoIAlignRotated |||| | |
4848
| RiRoIAlignRotated | || | | |
4949
| RoIAlign |||| ||
5050
| RoIAwarePool3d | ||| | |

mmcv/ops/csrc/common/pytorch_npu_helper.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#ifndef PYTORCH_NPU_HELPER_HPP_
1919
#define PYTORCH_NPU_HELPER_HPP_
2020

21-
#include <torch_npu/csrc/aten/CustomFunctions.h>
2221
#include <torch_npu/csrc/framework/utils/CalcuOpUtil.h>
2322
#include <torch_npu/csrc/framework/utils/OpAdapter.h>
2423

@@ -27,6 +26,18 @@
2726

2827
#define NPU_NAME_SPACE at_npu::native
2928

29+
const int SIZE = 8;
30+
c10::SmallVector<int64_t, SIZE> array_to_small_vector(c10::IntArrayRef shape)
31+
{
32+
c10::SmallVector<int64_t, SIZE> shape_small_vec;
33+
for (int i = 0; i < shape.size(); i++)
34+
{
35+
shape_small_vec.emplace_back(shape[i]);
36+
}
37+
38+
return shape_small_vec;
39+
}
40+
3041
#ifdef MMCV_WITH_XLA
3142
#define REGISTER_NPU_IMPL(key, value) REGISTER_DEVICE_IMPL(key, XLA, value)
3243
#else

mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,20 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
9999
c10::SmallVector<int64_t, 2> offsets = {0, 0};
100100
c10::SmallVector<int64_t, 2> sizes = {n_batch, 1};
101101
at::IntArrayRef offset = at::IntArrayRef(offsets);
102-
at::IntArrayRef size = at::IntArrayRef(sizes);
103-
at_npu::native::custom_ops::npu_slice_out(op_output, offset, size, output);
102+
at::IntArrayRef size_array = at::IntArrayRef(sizes);
103+
c10::SmallVector<int64_t, N> output_size;
104+
for (uint64_t i = 0; i < size_array.size(); i++) {
105+
output_size.emplace_back(size_array[i]);
106+
}
107+
at::Tensor result = at::empty(output_size, op_output.options());
108+
c10::SmallVector<int64_t, N> offsetVec = array_to_small_vector(offset);
109+
c10::SmallVector<int64_t, N> sizeVec = array_to_small_vector(size_array);
110+
cmd.Name("Slice")
111+
.Input(op_output)
112+
.Input(offsetVec)
113+
.Input(sizeVec)
114+
.Output(output)
115+
.Run();
104116
}
105117

106118
void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#include "pytorch_npu_helper.hpp"
2+
3+
using namespace NPU_NAME_SPACE;
4+
using namespace std;
5+
6+
void roi_align_rotated_forward_npu(Tensor input, Tensor rois, Tensor output,
7+
int aligned_height, int aligned_width,
8+
float spatial_scale, int sampling_ratio,
9+
bool aligned, bool clockwise) {
10+
int64_t aligned_height_64 = aligned_height;
11+
int64_t aligned_width_64 = aligned_width;
12+
int64_t sampling_ratio_64 = sampling_ratio;
13+
OpCommand cmd;
14+
cmd.Name("RoiAlignRotated")
15+
.Input(input)
16+
.Input(rois)
17+
.Output(output)
18+
.Attr("pooled_h", aligned_height_64)
19+
.Attr("pooled_w", aligned_width_64)
20+
.Attr("spatial_scale", spatial_scale)
21+
.Attr("sampling_ratio", sampling_ratio_64)
22+
.Attr("aligned", aligned)
23+
.Attr("clockwise", clockwise)
24+
.Run();
25+
}
26+
27+
void roi_align_rotated_backward_npu(Tensor top_grad, Tensor rois,
28+
Tensor bottom_grad, int aligned_height,
29+
int aligned_width, float spatial_scale,
30+
int sampling_ratio, bool aligned,
31+
bool clockwise) {
32+
int64_t aligned_height_64 = aligned_height;
33+
int64_t aligned_width_64 = aligned_width;
34+
int64_t sampling_ratio_64 = sampling_ratio;
35+
c10::SmallVector<int64_t, SIZE> y_grad_shape =
36+
array_to_small_vector(bottom_grad.sizes());
37+
OpCommand cmd;
38+
cmd.Name("RoiAlignRotatedGrad")
39+
.Input(top_grad)
40+
.Input(rois)
41+
.Output(bottom_grad)
42+
.Attr("y_grad_shape", y_grad_shape)
43+
.Attr("pooled_h", aligned_width_64)
44+
.Attr("pooled_w", aligned_height_64)
45+
.Attr("spatial_scale", spatial_scale)
46+
.Attr("sampling_ratio", sampling_ratio_64)
47+
.Attr("aligned", aligned)
48+
.Attr("clockwise", clockwise)
49+
.Run();
50+
}
51+
52+
void roi_align_rotated_forward_impl(Tensor input, Tensor rois, Tensor output,
53+
int aligned_height, int aligned_width,
54+
float spatial_scale, int sampling_ratio,
55+
bool aligned, bool clockwise);
56+
57+
void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
58+
Tensor bottom_grad, int aligned_height,
59+
int aligned_width, float spatial_scale,
60+
int sampling_ratio, bool aligned,
61+
bool clockwise);
62+
63+
REGISTER_NPU_IMPL(roi_align_rotated_forward_impl,
64+
roi_align_rotated_forward_npu);
65+
REGISTER_NPU_IMPL(roi_align_rotated_backward_impl,
66+
roi_align_rotated_backward_npu);

mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,29 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
5050
int64_t pooled_height_64 = pooled_height;
5151
int64_t pooled_width_64 = pooled_width;
5252
int64_t pooled_channel = 1;
53+
at::Tensor argmax_trans = argmax.transpose(1, 2).transpose(2, 3);
54+
at::Tensor grad_output_trans = grad_output.transpose(1, 2).transpose(2, 3);
5355
at::Tensor roi_actual_num =
5456
at::empty_like(rois, rois.options().dtype(at::kInt));
55-
at::Tensor x = at::ones_like(grad_input);
57+
at::Tensor x = at::ones_like(grad_input).transpose(1, 2).transpose(2, 3);
58+
at::Tensor y = at::zeros_like(x);
5659
OpCommand cmd;
5760
cmd.Name("RoiPoolingGradWithArgMax")
58-
.Input(grad_output)
61+
.Input(grad_output_trans)
5962
.Input(x)
6063
.Input(rois)
6164
.Input(roi_actual_num)
62-
.Input(argmax)
63-
.Output(grad_input)
65+
.Input(argmax_trans)
66+
.Output(y)
6467
.Attr("pooled_h", pooled_height_64)
6568
.Attr("pooled_w", pooled_width_64)
6669
.Attr("spatial_scale_h", spatial_scale)
6770
.Attr("spatial_scale_w", spatial_scale)
6871
.Attr("pool_channel", pooled_channel)
6972
.Run();
73+
at::Tensor result = y.transpose(2, 3).transpose(1, 2);
74+
at::Tensor res = NpuUtils::format_contiguous(result);
75+
grad_input.copy_(res);
7076
}
7177

7278
void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,

0 commit comments

Comments
 (0)