diff --git a/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp b/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp index 104b0d26c3..2464edb93b 100644 --- a/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp @@ -5,12 +5,13 @@ using namespace std; void assign_score_withk_forward_npu(int B, int N0, int N1, int M, int K, int O, int aggregate, const Tensor& points, - const Tensor& centers, const Tensor& scores, + const Tensor& centers, + const Tensor& scores, const Tensor& knn_idx, Tensor& output) { - at::Tensor points_trans = points.permute({0, 3, 1, 2}); - at::Tensor centers_trans = centers.permute({0, 3, 1, 2}); - EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores, - knn_idx, B, N0, N1, M, K, O, aggregate, output); + at::Tensor points_trans = points.permute({0, 3, 1, 2}); + at::Tensor centers_trans = centers.permute({0, 3, 1, 2}); + + EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores, knn_idx, B, N0, N1, M, K, O, aggregate, output); } void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, @@ -19,5 +20,24 @@ void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O, const Tensor& scores, const Tensor& knn_idx, Tensor& output); -REGISTER_NPU_IMPL(assign_score_withk_forward_impl, - assign_score_withk_forward_npu); +REGISTER_NPU_IMPL(assign_score_withk_forward_impl, assign_score_withk_forward_npu); + + +void assign_score_withk_backward_npu( + int B, int N0, int N1, int M, int K, int O, int aggregate, + const Tensor& grad_out, const Tensor& points, const Tensor& centers, + const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, + Tensor& grad_centers, Tensor& grad_scores) { + + at::Tensor grad_out_trans = grad_out.permute({0, 2, 3, 1}); + + EXEC_NPU_CMD(aclnnAssignScoreWithkGrad, grad_out_trans, points, centers, scores, knn_idx, B, N0, N1, M, K, O, aggregate, grad_scores, grad_points, grad_centers); +} + +void assign_score_withk_backward_impl( + int B, int N0, int N1, int M, int K, int O, int aggregate, + const Tensor& grad_out, const Tensor& points, const Tensor& centers, + const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points, + Tensor& grad_centers, Tensor& grad_scores); + +REGISTER_NPU_IMPL(assign_score_withk_backward_impl, assign_score_withk_backward_npu); diff --git a/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp b/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp index eb937d50fa..5b92305d16 100644 --- a/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp @@ -52,4 +52,4 @@ vector> pixel_group_impl(Tensor score, Tensor mask, Tensor embeddi Tensor kernel_label, Tensor kernel_contour, int kernel_region_num, float distance_threshold); -REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu); \ No newline at end of file +REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu); diff --git a/tests/test_ops/test_assign_score_withk.py b/tests/test_ops/test_assign_score_withk.py index bd27144545..8c53cea2c7 100644 --- a/tests/test_ops/test_assign_score_withk.py +++ b/tests/test_ops/test_assign_score_withk.py @@ -126,77 +126,75 @@ def test_paconv_assign_scores(device): assert torch.allclose(output.detach().cpu(), expected_output, atol=1e-6) # test backward - if device == 'cuda': - loss = output.sum() - loss.backward() - expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683], - [-0.78873926, 0.7485497], - [-0.6866992, 0.05346543], - [0.04288036, -0.18217683]], - [[-1.1407862, 0.13533896], - [-0.06964391, -0.22948086], - [-1.1407862, 0.13533896], - [-0.06964391, -0.22948086]]], - [[[-0.3363995, -2.212181], - [-1.1589496, -2.7724311], - [-0.9387654, -1.3163853], - [-1.4385346, -1.0614843]], - [[-0.5048497, 1.4143617], - [-0.47332114, 0.6017133], - [-0.30974793, 1.1995442], - [-0.5048497, - 1.4143617]]]]).float() - expected_points_grad = torch.tensor( - [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0.15585709, 0.15585709, 0.15585709, 0.15585709], - [1.1893613, 1.1893613, 1.1893613, 1.1893613]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[1.6530733, 1.6530733, 1.6530733, 1.6530733], - [1.8130021, 1.8130021, 1.8130021, 1.8130021]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0.58863074, 0.58863074, 0.58863074, 0.58863074], - [1.3727596, 1.3727596, 1.3727596, 1.3727596]], - [[0.28462553, 0.28462553, 0.28462553, 0.28462553], - [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], - [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], - [0.34856772, 0.34856772, 0.34856772, 0.34856772]], - [[0.7405102, 0.7405102, 0.7405102, 0.7405102], - [0.06438422, 0.06438422, 0.06438422, 0.06438422]], - [[0.8491963, 0.8491963, 0.8491963, 0.8491963], - [1.1301711, 1.1301711, 1.1301711, 1.1301711]], - [[0.6887394, 0.6887394, 0.6887394, 0.6887394], - [0.22089851, 0.22089851, 0.22089851, 0.22089851]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0.605832, 0.605832, 0.605832, 0.605832], - [0.92364264, 0.92364264, 0.92364264, 0.92364264]], - [[0.23089725, 0.23089725, 0.23089725, 0.23089725], - [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() - expected_centers_grad = torch.tensor( - [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[-1.0493311, -1.0493311, -1.0493311, -1.0493311], - [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[-1.6328557, -1.6328557, -1.6328557, -1.6328557], - [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]]], - [[[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[0., 0., 0., 0.], [0., 0., 0., 0.]], - [[-1.5429721, -1.5429721, -1.5429721, -1.5429721], - [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], - [[-1.7103812, -1.7103812, -1.7103812, -1.7103812], - [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() - assert torch.allclose( - scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6) - assert torch.allclose( - points.grad.detach().cpu(), expected_points_grad, atol=1e-6) - assert torch.allclose( - centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6) + loss = output.sum() + loss.backward() + expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683], + [-0.78873926, 0.7485497], + [-0.6866992, 0.05346543], + [0.04288036, -0.18217683]], + [[-1.1407862, 0.13533896], + [-0.06964391, -0.22948086], + [-1.1407862, 0.13533896], + [-0.06964391, -0.22948086]]], + [[[-0.3363995, -2.212181], + [-1.1589496, -2.7724311], + [-0.9387654, -1.3163853], + [-1.4385346, -1.0614843]], + [[-0.5048497, 1.4143617], + [-0.47332114, 0.6017133], + [-0.30974793, 1.1995442], + [-0.5048497, 1.4143617]]]]).float() + expected_points_grad = torch.tensor( + [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0.15585709, 0.15585709, 0.15585709, 0.15585709], + [1.1893613, 1.1893613, 1.1893613, 1.1893613]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[1.6530733, 1.6530733, 1.6530733, 1.6530733], + [1.8130021, 1.8130021, 1.8130021, 1.8130021]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0.58863074, 0.58863074, 0.58863074, 0.58863074], + [1.3727596, 1.3727596, 1.3727596, 1.3727596]], + [[0.28462553, 0.28462553, 0.28462553, 0.28462553], + [0.8378516, 0.8378516, 0.8378516, 0.8378516]]], + [[[0.13817799, 0.13817799, 0.13817799, 0.13817799], + [0.34856772, 0.34856772, 0.34856772, 0.34856772]], + [[0.7405102, 0.7405102, 0.7405102, 0.7405102], + [0.06438422, 0.06438422, 0.06438422, 0.06438422]], + [[0.8491963, 0.8491963, 0.8491963, 0.8491963], + [1.1301711, 1.1301711, 1.1301711, 1.1301711]], + [[0.6887394, 0.6887394, 0.6887394, 0.6887394], + [0.22089851, 0.22089851, 0.22089851, 0.22089851]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0.605832, 0.605832, 0.605832, 0.605832], + [0.92364264, 0.92364264, 0.92364264, 0.92364264]], + [[0.23089725, 0.23089725, 0.23089725, 0.23089725], + [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float() + expected_centers_grad = torch.tensor( + [[[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[-1.0493311, -1.0493311, -1.0493311, -1.0493311], + [-2.0301602, -2.0301602, -2.0301602, -2.0301602]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[-1.6328557, -1.6328557, -1.6328557, -1.6328557], + [-3.1828144, -3.1828144, -3.1828144, -3.1828144]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]]], + [[[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.]], + [[-1.5429721, -1.5429721, -1.5429721, -1.5429721], + [-1.6100934, -1.6100934, -1.6100934, -1.6100934]], + [[-1.7103812, -1.7103812, -1.7103812, -1.7103812], + [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float() + assert torch.allclose( + scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6) + assert torch.allclose( + points.grad.detach().cpu(), expected_points_grad, atol=1e-6) + assert torch.allclose( + centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6)