Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion xllm/core/framework/xtensor/phy_page_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ class PhyPagePool {

// Track which pages are allocated (for segment management)
std::vector<bool> page_allocated_;

};

} // namespace xllm
4 changes: 2 additions & 2 deletions xllm/core/framework/xtensor/xtensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ static inline void unmap_pages(
}

for (const auto& entry : mapping) {
VirPtr addr = add_vir_ptr_offset(
vaddr, static_cast<size_t>(entry.first) * page_size);
VirPtr addr =
add_vir_ptr_offset(vaddr, static_cast<size_t>(entry.first) * page_size);
vmm::unmap(addr, page_size);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ TEST_F(DeepseekV2SparseMoEBlockTest, PrepInDpGatherBuildsLocalSkip) {
auto prep = block->prep_in(attn_out,
residual,
input_params,
block->plan_exec(input_params),
DeepseekV2AttentionImpl::PostAttnLayout::kTpShard);

EXPECT_TRUE(prep.need_dp_gather);
Expand Down Expand Up @@ -349,6 +350,7 @@ TEST_F(DeepseekV2SparseMoEBlockTest, PrepInAll2AllPadsTpShardInput) {
auto prep = block->prep_in(attn_out,
residual,
input_params,
block->plan_exec(input_params),
DeepseekV2AttentionImpl::PostAttnLayout::kTpShard);

EXPECT_FALSE(prep.need_dp_gather);
Expand All @@ -361,6 +363,35 @@ TEST_F(DeepseekV2SparseMoEBlockTest, PrepInAll2AllPadsTpShardInput) {
test::verify_tensor_close(prep.skip_local, prep.ffn_in);
}

TEST_F(DeepseekV2SparseMoEBlockTest, PrepInUsesProvidedExecCfg) {
set_tp_dp_ctx(/*world_size=*/4, /*dp_size=*/2, /*tp_size=*/2, /*ep_size=*/4);
auto block = create_block();

ModelInputParams input_params;
input_params.dp_global_token_nums = {3, 1};
input_params.dp_is_decode = {0, 0};
auto planned_cfg = block->plan_exec(input_params);
EXPECT_FALSE(planned_cfg.enable_all2all);
EXPECT_TRUE(planned_cfg.need_dp_gather);

DeepseekV2SparseMoEBlockImpl::ExecCfg forced_cfg;
forced_cfg.enable_all2all = true;
forced_cfg.need_dp_gather = false;
auto attn_out = mat(/*rows=*/3, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
auto residual = mat(/*rows=*/3, {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f});

auto prep = block->prep_in(attn_out,
residual,
input_params,
forced_cfg,
DeepseekV2AttentionImpl::PostAttnLayout::kTpShard);

EXPECT_FALSE(prep.need_dp_gather);
EXPECT_TRUE(prep.need_tp_pad);
EXPECT_TRUE(prep.pad_info.active);
test::verify_tensor_close(prep.skip_local, prep.ffn_in);
}

TEST_F(DeepseekV2SparseMoEBlockTest, MergeOutTpPadGathersAndUnpads) {
set_tp_ctx(/*world_size=*/2, /*ep_size=*/2);
auto block = create_block();
Expand Down
7 changes: 5 additions & 2 deletions xllm/core/layers/mlu/deepseek_v2_decoder_layer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,11 @@ DeepseekV2DecoderLayerImpl::prepare_moe_inputs(
}

if (result.exec_cfg->enable_all2all || result.exec_cfg->need_dp_gather) {
result.moe_prep =
sparse_moe_->prep_in(std::move(x), residual, input_params, attn_layout);
result.moe_prep = sparse_moe_->prep_in(std::move(x),
residual,
input_params,
result.exec_cfg.value(),
attn_layout);
result.ffn_in = result.moe_prep->ffn_in;
return result;
}
Expand Down
2 changes: 1 addition & 1 deletion xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ DeepseekV2SparseMoEBlockImpl::PrepOut DeepseekV2SparseMoEBlockImpl::prep_in(
torch::Tensor x,
const torch::Tensor& residual,
const ModelInputParams& input_params,
const ExecCfg& exec,
DeepseekV2AttentionImpl::PostAttnLayout attn_layout) const {
PrepOut prep;
const ExecCfg exec = plan_exec(input_params);
if (exec.enable_all2all) {
auto shard =
shard_attn_out(x,
Expand Down
1 change: 1 addition & 0 deletions xllm/core/layers/mlu/deepseek_v2_sparse_moe_block.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class DeepseekV2SparseMoEBlockImpl : public torch::nn::Module {
PrepOut prep_in(torch::Tensor x,
const torch::Tensor& residual,
const ModelInputParams& input_params,
const ExecCfg& exec,
DeepseekV2AttentionImpl::PostAttnLayout attn_layout) const;
torch::Tensor gather_in(const PrepOut& prep,
const ModelInputParams& input_params) const;
Expand Down
Loading