From e0224fad5ebc0d1aec19002a0571388816ef7f99 Mon Sep 17 00:00:00 2001
From: pengtao <pengtao.156@jd.com>
Date: Thu, 16 Apr 2026 17:03:09 +0800
Subject: [PATCH] feat: reuse preplanned ExecCfg in sparse MoE prep_in.

Signed-off-by: pengtao <pengtao.156@jd.com>
---
 xllm/core/framework/xtensor/phy_page_pool.h   |  1 -
 xllm/core/framework/xtensor/xtensor.cpp       |  4 +--
 .../deepseek_v2_sparse_moe_block_tests.cpp    | 31 +++++++++++++++++++
 .../mlu/deepseek_v2_decoder_layer_impl.cpp    |  7 +++--
 .../mlu/deepseek_v2_sparse_moe_block.cpp      |  2 +-
 .../layers/mlu/deepseek_v2_sparse_moe_block.h |  1 +
 6 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/xllm/core/framework/xtensor/phy_page_pool.h b/xllm/core/framework/xtensor/phy_page_pool.h
index 50021cb5f..f2147474f 100644
--- a/xllm/core/framework/xtensor/phy_page_pool.h
+++ b/xllm/core/framework/xtensor/phy_page_pool.h
@@ -120,7 +120,6 @@ class PhyPagePool {
 
   // Track which pages are allocated (for segment management)
   std::vector<bool> page_allocated_;
-
 };
 
 }  // namespace xllm
diff --git a/xllm/core/framework/xtensor/xtensor.cpp b/xllm/core/framework/xtensor/xtensor.cpp
index 70c2e165c..b978928e4 100644
--- a/xllm/core/framework/xtensor/xtensor.cpp
+++ b/xllm/core/framework/xtensor/xtensor.cpp
@@ -83,8 +83,8 @@ static inline void unmap_pages(
   }
 
   for (const auto& entry : mapping) {
-    VirPtr addr = add_vir_ptr_offset(
-        vaddr, static_cast<size_t>(entry.first) * page_size);
+    VirPtr addr =
+        add_vir_ptr_offset(vaddr, static_cast<size_t>(entry.first) * page_size);
     vmm::unmap(addr, page_size);
   }
 }
diff --git a/xllm/core/layers/common/tests/deepseek_v2_sparse_moe_block_tests.cpp b/xllm/core/layers/common/tests/deepseek_v2_sparse_moe_block_tests.cpp
index 31aa43eda..cd0fef4d2 100644
--- a/xllm/core/layers/common/tests/deepseek_v2_sparse_moe_block_tests.cpp
+++ b/xllm/core/layers/common/tests/deepseek_v2_sparse_moe_block_tests.cpp
@@ -302,6 +302,7 @@ TEST_F(DeepseekV2SparseMoEBlockTest, PrepInDpGatherBuildsLocalSkip) {
   auto prep = block->prep_in(attn_out,
                              residual,
                              input_params,
+                             block->plan_exec(input_params),
                              DeepseekV2AttentionImpl::PostAttnLayout::kTpShard);
 
   EXPECT_TRUE(prep.need_dp_gather);
@@ -349,6 +350,7 @@ TEST_F(DeepseekV2SparseMoEBlockTest, PrepInAll2AllPadsTpShardInput) {
   auto prep = block->prep_in(attn_out,
                              residual,
                              input_params,
+                             block->plan_exec(input_params),
                              DeepseekV2AttentionImpl::PostAttnLayout::kTpShard);
 
   EXPECT_FALSE(prep.need_dp_gather);
@@ -361,6 +363,35 @@ TEST_F(DeepseekV2SparseMoEBlockTest, PrepInAll2AllPadsTpShardInput) {
   test::verify_tensor_close(prep.skip_local, prep.ffn_in);
 }
 
+TEST_F(DeepseekV2SparseMoEBlockTest, PrepInUsesProvidedExecCfg) {
+  set_tp_dp_ctx(/*world_size=*/4, /*dp_size=*/2, /*tp_size=*/2, /*ep_size=*/4);
+  auto block = create_block();
+
+  ModelInputParams input_params;
+  input_params.dp_global_token_nums = {3, 1};
+  input_params.dp_is_decode = {0, 0};
+  auto planned_cfg = block->plan_exec(input_params);
+  EXPECT_FALSE(planned_cfg.enable_all2all);
+  EXPECT_TRUE(planned_cfg.need_dp_gather);
+
+  DeepseekV2SparseMoEBlockImpl::ExecCfg forced_cfg;
+  forced_cfg.enable_all2all = true;
+  forced_cfg.need_dp_gather = false;
+  auto attn_out = mat(/*rows=*/3, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
+  auto residual = mat(/*rows=*/3, {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f});
+
+  auto prep = block->prep_in(attn_out,
+                             residual,
+                             input_params,
+                             forced_cfg,
+                             DeepseekV2AttentionImpl::PostAttnLayout::kTpShard);
+
+  EXPECT_FALSE(prep.need_dp_gather);
+  EXPECT_TRUE(prep.need_tp_pad);
+  EXPECT_TRUE(prep.pad_info.active);
+  test::verify_tensor_close(prep.skip_local, prep.ffn_in);
+}
+
 TEST_F(DeepseekV2SparseMoEBlockTest, MergeOutTpPadGathersAndUnpads) {
   set_tp_ctx(/*world_size=*/2, /*ep_size=*/2);
   auto block = create_block();
diff --git a/xllm/core/layers/mlu/deepseek_v2_decoder_layer_impl.cpp b/xllm/core/layers/mlu/deepseek_v2_decoder_layer_impl.cpp
index 9ef413095..cc11b268b 100644
--- a/xllm/core/layers/mlu/deepseek_v2_decoder_layer_impl.cpp
+++ b/xllm/core/layers/mlu/deepseek_v2_decoder_layer_impl.cpp
@@ -181,8 +181,11 @@ DeepseekV2DecoderLayerImpl::prepare_moe_inputs(
   }
 
   if (result.exec_cfg->enable_all2all || result.exec_cfg->need_dp_gather) {
-    result.moe_prep =
-        sparse_moe_->prep_in(std::move(x), residual, input_params, attn_layout);
+    result.moe_prep = sparse_moe_->prep_in(std::move(x),
+                                           residual,
+                                           input_params,
+                                           result.exec_cfg.value(),
+                                           attn_layout);
     result.ffn_in = result.moe_prep->ffn_in;
     return result;
   }
diff --git a/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.cpp b/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.cpp
index ff532840c..ffa3b304a 100644
--- a/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.cpp
+++ b/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.cpp
@@ -69,9 +69,9 @@ DeepseekV2SparseMoEBlockImpl::PrepOut DeepseekV2SparseMoEBlockImpl::prep_in(
     torch::Tensor x,
     const torch::Tensor& residual,
     const ModelInputParams& input_params,
+    const ExecCfg& exec,
     DeepseekV2AttentionImpl::PostAttnLayout attn_layout) const {
   PrepOut prep;
-  const ExecCfg exec = plan_exec(input_params);
   if (exec.enable_all2all) {
     auto shard =
         shard_attn_out(x,
diff --git a/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.h b/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.h
index 6db9a2bb5..99637536d 100644
--- a/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.h
+++ b/xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.h
@@ -77,6 +77,7 @@ class DeepseekV2SparseMoEBlockImpl : public torch::nn::Module {
   PrepOut prep_in(torch::Tensor x,
                   const torch::Tensor& residual,
                   const ModelInputParams& input_params,
+                  const ExecCfg& exec,
                   DeepseekV2AttentionImpl::PostAttnLayout attn_layout) const;
   torch::Tensor gather_in(const PrepOut& prep,
                           const ModelInputParams& input_params) const;