diff --git a/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp b/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp
index 104b0d26c3..2464edb93b 100644
--- a/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp
@@ -5,12 +5,13 @@ using namespace std;
 
 void assign_score_withk_forward_npu(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
-                                    const Tensor& centers, const Tensor& scores,
+                                    const Tensor& centers,
+                                    const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output) {
-  at::Tensor points_trans = points.permute({0, 3, 1, 2});
-  at::Tensor centers_trans = centers.permute({0, 3, 1, 2});
-  EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores,
-               knn_idx, B, N0, N1, M, K, O, aggregate, output);
+    at::Tensor points_trans = points.permute({0, 3, 1, 2});
+    at::Tensor centers_trans = centers.permute({0, 3, 1, 2});
+
+    EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores, knn_idx, B, N0, N1, M, K, O, aggregate, output);
 }
 
 void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
@@ -19,5 +20,24 @@ void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
                                      const Tensor& scores,
                                      const Tensor& knn_idx, Tensor& output);
 
-REGISTER_NPU_IMPL(assign_score_withk_forward_impl,
-                  assign_score_withk_forward_npu);
+REGISTER_NPU_IMPL(assign_score_withk_forward_impl, assign_score_withk_forward_npu);
+
+
+void assign_score_withk_backward_npu(
+    int B, int N0, int N1, int M, int K, int O, int aggregate,
+    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
+    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
+    Tensor& grad_centers, Tensor& grad_scores) {
+
+    at::Tensor grad_out_trans = grad_out.permute({0, 2, 3, 1});
+
+    EXEC_NPU_CMD(aclnnAssignScoreWithkGrad, grad_out_trans, points, centers, scores, knn_idx, B, N0, N1, M, K, O, aggregate, grad_scores, grad_points, grad_centers);
+}
+
+void assign_score_withk_backward_impl(
+    int B, int N0, int N1, int M, int K, int O, int aggregate,
+    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
+    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
+    Tensor& grad_centers, Tensor& grad_scores);
+
+REGISTER_NPU_IMPL(assign_score_withk_backward_impl, assign_score_withk_backward_npu);
diff --git a/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp b/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp
index eb937d50fa..5b92305d16 100644
--- a/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/pixel_group_npu.cpp
@@ -52,4 +52,4 @@ vector<vector<float>> pixel_group_impl(Tensor score, Tensor mask, Tensor embeddi
                                        Tensor kernel_label, Tensor kernel_contour,
                                        int kernel_region_num, float distance_threshold);
 
-REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu);
\ No newline at end of file
+REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu);
diff --git a/tests/test_ops/test_assign_score_withk.py b/tests/test_ops/test_assign_score_withk.py
index bd27144545..8c53cea2c7 100644
--- a/tests/test_ops/test_assign_score_withk.py
+++ b/tests/test_ops/test_assign_score_withk.py
@@ -126,77 +126,75 @@ def test_paconv_assign_scores(device):
     assert torch.allclose(output.detach().cpu(), expected_output, atol=1e-6)
 
     # test backward
-    if device == 'cuda':
-        loss = output.sum()
-        loss.backward()
-        expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683],
-                                               [-0.78873926, 0.7485497],
-                                               [-0.6866992, 0.05346543],
-                                               [0.04288036, -0.18217683]],
-                                              [[-1.1407862, 0.13533896],
-                                               [-0.06964391, -0.22948086],
-                                               [-1.1407862, 0.13533896],
-                                               [-0.06964391, -0.22948086]]],
-                                             [[[-0.3363995, -2.212181],
-                                               [-1.1589496, -2.7724311],
-                                               [-0.9387654, -1.3163853],
-                                               [-1.4385346, -1.0614843]],
-                                              [[-0.5048497, 1.4143617],
-                                               [-0.47332114, 0.6017133],
-                                               [-0.30974793, 1.1995442],
-                                               [-0.5048497,
-                                                1.4143617]]]]).float()
-        expected_points_grad = torch.tensor(
-            [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0.15585709, 0.15585709, 0.15585709, 0.15585709],
-               [1.1893613, 1.1893613, 1.1893613, 1.1893613]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[1.6530733, 1.6530733, 1.6530733, 1.6530733],
-               [1.8130021, 1.8130021, 1.8130021, 1.8130021]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0.58863074, 0.58863074, 0.58863074, 0.58863074],
-               [1.3727596, 1.3727596, 1.3727596, 1.3727596]],
-              [[0.28462553, 0.28462553, 0.28462553, 0.28462553],
-               [0.8378516, 0.8378516, 0.8378516, 0.8378516]]],
-             [[[0.13817799, 0.13817799, 0.13817799, 0.13817799],
-               [0.34856772, 0.34856772, 0.34856772, 0.34856772]],
-              [[0.7405102, 0.7405102, 0.7405102, 0.7405102],
-               [0.06438422, 0.06438422, 0.06438422, 0.06438422]],
-              [[0.8491963, 0.8491963, 0.8491963, 0.8491963],
-               [1.1301711, 1.1301711, 1.1301711, 1.1301711]],
-              [[0.6887394, 0.6887394, 0.6887394, 0.6887394],
-               [0.22089851, 0.22089851, 0.22089851, 0.22089851]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0.605832, 0.605832, 0.605832, 0.605832],
-               [0.92364264, 0.92364264, 0.92364264, 0.92364264]],
-              [[0.23089725, 0.23089725, 0.23089725, 0.23089725],
-               [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float()
-        expected_centers_grad = torch.tensor(
-            [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[-1.0493311, -1.0493311, -1.0493311, -1.0493311],
-               [-2.0301602, -2.0301602, -2.0301602, -2.0301602]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[-1.6328557, -1.6328557, -1.6328557, -1.6328557],
-               [-3.1828144, -3.1828144, -3.1828144, -3.1828144]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]]],
-             [[[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[0., 0., 0., 0.], [0., 0., 0., 0.]],
-              [[-1.5429721, -1.5429721, -1.5429721, -1.5429721],
-               [-1.6100934, -1.6100934, -1.6100934, -1.6100934]],
-              [[-1.7103812, -1.7103812, -1.7103812, -1.7103812],
-               [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float()
-        assert torch.allclose(
-            scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6)
-        assert torch.allclose(
-            points.grad.detach().cpu(), expected_points_grad, atol=1e-6)
-        assert torch.allclose(
-            centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6)
+    loss = output.sum()
+    loss.backward()
+    expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683],
+                                           [-0.78873926, 0.7485497],
+                                           [-0.6866992, 0.05346543],
+                                           [0.04288036, -0.18217683]],
+                                          [[-1.1407862, 0.13533896],
+                                           [-0.06964391, -0.22948086],
+                                           [-1.1407862, 0.13533896],
+                                           [-0.06964391, -0.22948086]]],
+                                         [[[-0.3363995, -2.212181],
+                                           [-1.1589496, -2.7724311],
+                                           [-0.9387654, -1.3163853],
+                                           [-1.4385346, -1.0614843]],
+                                          [[-0.5048497, 1.4143617],
+                                           [-0.47332114, 0.6017133],
+                                           [-0.30974793, 1.1995442],
+                                           [-0.5048497, 1.4143617]]]]).float()
+    expected_points_grad = torch.tensor(
+        [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0.15585709, 0.15585709, 0.15585709, 0.15585709],
+           [1.1893613, 1.1893613, 1.1893613, 1.1893613]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[1.6530733, 1.6530733, 1.6530733, 1.6530733],
+           [1.8130021, 1.8130021, 1.8130021, 1.8130021]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0.58863074, 0.58863074, 0.58863074, 0.58863074],
+           [1.3727596, 1.3727596, 1.3727596, 1.3727596]],
+          [[0.28462553, 0.28462553, 0.28462553, 0.28462553],
+           [0.8378516, 0.8378516, 0.8378516, 0.8378516]]],
+         [[[0.13817799, 0.13817799, 0.13817799, 0.13817799],
+           [0.34856772, 0.34856772, 0.34856772, 0.34856772]],
+          [[0.7405102, 0.7405102, 0.7405102, 0.7405102],
+           [0.06438422, 0.06438422, 0.06438422, 0.06438422]],
+          [[0.8491963, 0.8491963, 0.8491963, 0.8491963],
+           [1.1301711, 1.1301711, 1.1301711, 1.1301711]],
+          [[0.6887394, 0.6887394, 0.6887394, 0.6887394],
+           [0.22089851, 0.22089851, 0.22089851, 0.22089851]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0.605832, 0.605832, 0.605832, 0.605832],
+           [0.92364264, 0.92364264, 0.92364264, 0.92364264]],
+          [[0.23089725, 0.23089725, 0.23089725, 0.23089725],
+           [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float()
+    expected_centers_grad = torch.tensor(
+        [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[-1.0493311, -1.0493311, -1.0493311, -1.0493311],
+           [-2.0301602, -2.0301602, -2.0301602, -2.0301602]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[-1.6328557, -1.6328557, -1.6328557, -1.6328557],
+           [-3.1828144, -3.1828144, -3.1828144, -3.1828144]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]]],
+         [[[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
+          [[-1.5429721, -1.5429721, -1.5429721, -1.5429721],
+           [-1.6100934, -1.6100934, -1.6100934, -1.6100934]],
+          [[-1.7103812, -1.7103812, -1.7103812, -1.7103812],
+           [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float()
+    assert torch.allclose(
+        scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6)
+    assert torch.allclose(
+        points.grad.detach().cpu(), expected_points_grad, atol=1e-6)
+    assert torch.allclose(
+        centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6)