jd-opensource
diff --git a/‎xllm/core/distributed_runtime/spawn_worker_server/CMakeLists.txt‎
Lines changed: 0 additions & 10 deletions b/‎xllm/core/distributed_runtime/spawn_worker_server/CMakeLists.txt‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎xllm/core/framework/batch/batch_test.cpp‎
Lines changed: 2 additions & 2 deletions b/‎xllm/core/framework/batch/batch_test.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎xllm/core/framework/eplb/eplb_policy_test.cpp‎
Lines changed: 6 additions & 0 deletions b/‎xllm/core/framework/eplb/eplb_policy_test.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎xllm/core/framework/kv_cache/CMakeLists.txt‎
Lines changed: 11 additions & 3 deletions b/‎xllm/core/framework/kv_cache/CMakeLists.txt‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎xllm/core/framework/kv_cache/embedding_cache_test.cpp‎
Lines changed: 10 additions & 0 deletions b/‎xllm/core/framework/kv_cache/embedding_cache_test.cpp‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎xllm/core/framework/kv_cache/indexed_kv_cache_impl.cpp‎
Lines changed: 49 additions & 0 deletions b/‎xllm/core/framework/kv_cache/indexed_kv_cache_impl.cpp‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎xllm/core/framework/kv_cache/indexed_kv_cache_impl.h‎
Lines changed: 43 additions & 0 deletions b/‎xllm/core/framework/kv_cache/indexed_kv_cache_impl.h‎
Lines changed: 43 additions & 0 deletions
@@ -9,17 +9,7 @@ cc_binary(
     spawn_worker_server.cpp
     spawn_worker_server_process.cpp
   DEPS
-    :models
-    :model
     :distributed_runtime
-    absl::strings
-    $<$<BOOL:${USE_NPU}>:xllm_atb_layers>
-    $<$<BOOL:${USE_NPU}>:ascendcl>
-    $<$<BOOL:${USE_NPU}>:nnopbase>
-    $<$<BOOL:${USE_NPU}>:atb>
-    $<$<BOOL:${USE_NPU}>:atb_customize>
-    $<$<BOOL:${USE_NPU}>:c_sec>
-    spdlog::spdlog
 )
 
 add_dependencies(export_module spawn_worker)
@@ -312,12 +312,12 @@ TEST(BatchTest, KVCacheEmptySupportsLinearOnlyAndFullOnlyLayouts) {
   auto conv_cache = torch::zeros({2, 4, 3}, options);
   auto ssm_cache = torch::zeros({2, 1, 4, 4}, options);
   KVCache linear_only_cache(
-      torch::Tensor(), torch::Tensor(), conv_cache, ssm_cache);
+      LinearAttentionKVCacheTensors{conv_cache, ssm_cache});
   EXPECT_FALSE(linear_only_cache.empty());
 
   auto key_cache = torch::zeros({2, 4, 1, 8}, options);
   auto value_cache = torch::zeros({2, 4, 1, 8}, options);
-  KVCache full_only_cache(key_cache, value_cache);
+  KVCache full_only_cache(KVCacheTensors{key_cache, value_cache});
   EXPECT_FALSE(full_only_cache.empty());
 
   KVCache empty_cache;
 
@@ -19,9 +19,15 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include <torch/torch.h>
 
+#include "platform/device.h"
+
 namespace xllm {
 
 TEST(EplbPolicyTest, Build) {
+  // use init device to trigger the loading of torch backend for different
+  // devices
+  //  since the allocation of pinnned memory on cpu is still backend-dependent.
+  torch::Device device(Device::type_torch(), 0);
   std::string rank_table_file;
   EplbPolicy eplb_policy(5, 4, 1);
   std::vector<torch::Tensor> tensors;
 
@@ -1,4 +1,3 @@
-include(cc_binary)
 include(cc_library)
 include(cc_test)
 
@@ -8,13 +7,24 @@ cc_library(
     kv_cache
   HDRS
     embedding_cache.h
+    indexed_kv_cache_impl.h
     kv_cache.h
     kv_cache_event.h
+    kv_cache_impl.h
+    kv_cache_utils.h
+    linear_attention_kv_cache_impl.h
+    quantized_kv_cache_impl.h
   SRCS
     embedding_cache.cpp 
+    indexed_kv_cache_impl.cpp
     kv_cache.cpp
+    kv_cache_impl.cpp
+    kv_cache_utils.cpp
+    linear_attention_kv_cache_impl.cpp
+    quantized_kv_cache_impl.cpp
   DEPS
     :common
+    :xtensor
     glog::glog
     torch
     $<$<BOOL:${USE_NPU}>:torch_npu>
@@ -26,9 +36,7 @@ cc_test(
   SRCS
     embedding_cache_test.cpp
   DEPS
-    :xllm_server
     :kv_cache
-    $<$<BOOL:${USE_NPU}>:xllm_server>
     GTest::gtest_main
 )
 target_link_libraries(embedding_cache_test
 
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <gtest/gtest.h>
 
+#include "platform/device.h"
+
 namespace xllm {
 
 namespace {
@@ -28,6 +30,10 @@ bool tensor_equal(const torch::Tensor& lhs, const torch::Tensor& rhs) {
 }  // namespace
 
 TEST(EmbeddingCacheTest, WriteAndClear) {
+  // use init device to trigger the loading of torch backend for different
+  // devices
+  //  since the allocation of pinnned memory on cpu is still backend-dependent.
+  torch::Device device(Device::type_torch(), 0);
   EmbeddingCache cache(/*total_nums=*/4);
 
   std::vector<int32_t> ids = {3, 2};
@@ -57,6 +63,10 @@ TEST(EmbeddingCacheTest, WriteAndClear) {
 }
 
 TEST(EmbeddingCacheTest, WriteSelectedOnlyProbs) {
+  // use init device to trigger the loading of torch backend for different
+  // devices
+  //  since the allocation of pinnned memory on cpu is still backend-dependent.
+  torch::Device device(Device::type_torch(), 0);
   EmbeddingCache cache(/*total_nums=*/2);
   std::vector<int32_t> ids = {0, 1};
   auto cached_tokens = torch::tensor({11, 12}, torch::kInt);
 
@@ -0,0 +1,49 @@
+/* Copyright 2026 The xLLM Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://github.com/jd-opensource/xllm/blob/main/LICENSE
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "framework/kv_cache/indexed_kv_cache_impl.h"
+
+#include "util/tensor_helper.h"
+
+namespace xllm {
+
+IndexedKVCacheImpl::IndexedKVCacheImpl(const IndexedKVCacheTensors& tensors)
+    : KVCacheImpl(tensors.kv_cache_tensors),
+      index_cache_(tensors.index_cache) {}
+
+IndexedKVCacheImpl::IndexedKVCacheImpl(
+    const std::vector<std::vector<int64_t>>& kv_cache_shape,
+    const KVCacheCreateOptions& create_options)
+    : IndexedKVCacheImpl(
+          create_indexed_kv_cache_tensors(kv_cache_shape, create_options)) {}
+
+torch::Tensor IndexedKVCacheImpl::get_index_cache() const {
+  return index_cache_;
+}
+
+bool IndexedKVCacheImpl::empty() const {
+  return !key_cache_.defined() || !value_cache_.defined() ||
+         !index_cache_.defined();
+}
+
+std::vector<std::vector<int64_t>> IndexedKVCacheImpl::get_shapes() const {
+  std::vector<std::vector<int64_t>> tensor_shapes(3);
+  tensor_shapes[0] = get_tensor_shape(key_cache_);
+  tensor_shapes[1] = get_tensor_shape(value_cache_);
+  tensor_shapes[2] = get_tensor_shape(index_cache_);
+  return tensor_shapes;
+}
+
+}  // namespace xllm
@@ -0,0 +1,43 @@
+/* Copyright 2026 The xLLM Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://github.com/jd-opensource/xllm/blob/main/LICENSE
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#pragma once
+
+#include "framework/kv_cache/kv_cache_impl.h"
+
+namespace xllm {
+
+class IndexedKVCacheImpl final : public KVCacheImpl {
+ public:
+  explicit IndexedKVCacheImpl(const IndexedKVCacheTensors& tensors);
+  IndexedKVCacheImpl(const std::vector<std::vector<int64_t>>& kv_cache_shape,
+                     const KVCacheCreateOptions& create_options);
+
+  torch::Tensor get_index_cache() const override;
+
+  bool empty() const override;
+
+  std::vector<std::vector<int64_t>> get_shapes() const override;
+
+  void swap_blocks(torch::Tensor& src_tensor,
+                   torch::Tensor& dst_tensor) override {
+    NOT_IMPLEMENTED();
+  };
+
+ private:
+  torch::Tensor index_cache_;
+};
+
+}  // namespace xllm