bugfix: resolve multi-threading conflict in kv cache events reporting.

Kang-Meng · Kang-Meng · commit 5d08f1c9fa54 · 2025-12-15T22:22:27.000+08:00
diff --git a/xllm/core/framework/prefix_cache/prefix_cache.cpp b/xllm/core/framework/prefix_cache/prefix_cache.cpp
@@ -124,6 +124,11 @@ size_t PrefixCache::insert(const Slice<int32_t>& token_ids,
   return insert(token_ids, blocks, &insert_keys);
 }
 
+size_t PrefixCache::insert(const std::vector<Block>& blocks) {
+  std::vector<Murmur3Key> insert_keys;
+  return insert(blocks, &insert_keys);
+}
+
 size_t PrefixCache::evict(size_t n_blocks) {
   std::vector<Murmur3Key> evict_keys;
   return evict(n_blocks, &evict_keys);
@@ -192,11 +197,13 @@ size_t PrefixCache::insert(const Slice<int32_t>& token_ids,
   return n_tokens;
 }
 
-size_t PrefixCache::insert(const std::vector<Block>& blocks) {
+size_t PrefixCache::insert(const std::vector<Block>& blocks,
+                           std::vector<Murmur3Key>* insert_keys) {
   const int64_t now = absl::ToUnixMicros(absl::Now());
   DNodeList node_list;
   Murmur3Key token_hash_key;
 
+  insert_keys->reserve(blocks.size());
   for (size_t i = 0; i < blocks.size(); i++) {
     if (!blocks[i].is_valid()) {
       continue;
@@ -220,6 +227,8 @@ size_t PrefixCache::insert(const std::vector<Block>& blocks) {
       cached_blocks_.emplace(std::make_pair(token_hash_key, new_node));
 
       num_blocks_++;
+
+      insert_keys->emplace_back(token_hash_key.data);
     }
   }
 
diff --git a/xllm/core/framework/prefix_cache/prefix_cache.h b/xllm/core/framework/prefix_cache/prefix_cache.h
@@ -66,8 +66,13 @@ class PrefixCache {
       const Slice<int32_t>& token_ids,
       const Slice<Block>& existed_shared_blocks = {});
 
+  // insert the token ids and blocks into the prefix tree
+  // and set hash key to the corresponding block
+  // return the length of new inserted tokens
   virtual size_t insert(const Slice<int32_t>& token_ids,
                         std::vector<Block>& blocks);
+
+  // insert the blocks with hash key into the prefix tree
   virtual size_t insert(const std::vector<Block>& blocks);
 
   // evict blocks hold by the prefix cache
@@ -98,6 +103,10 @@ class PrefixCache {
   size_t insert(const Slice<int32_t>& token_ids,
                 std::vector<Block>& blocks,
                 std::vector<Murmur3Key>* insert_keys);
+
+  size_t insert(const std::vector<Block>& blocks,
+                std::vector<Murmur3Key>* insert_keys);
+
   size_t evict(size_t n_blocks, std::vector<Murmur3Key>* evict_keys);
 
   struct Node {
diff --git a/xllm/core/framework/prefix_cache/prefix_cache_with_upload.cpp b/xllm/core/framework/prefix_cache/prefix_cache_with_upload.cpp
@@ -35,47 +35,54 @@ size_t PrefixCacheWithUpload::insert(const Slice<int32_t>& token_ids,
                                      std::vector<Block>& blocks) {
   std::vector<Murmur3Key> insert_keys;
   auto n_tokens = PrefixCache::insert(token_ids, blocks, &insert_keys);
+  save_event_async(true, insert_keys);
+  return n_tokens;
+}
 
-  threadpool_.schedule([insert_keys = std::move(insert_keys), this]() {
-    auto front_ptr = this->db_kvcache_events_.get_front_value();
-    if (!front_ptr) {
-      LOG(INFO) << "Front DoubleBufferKvCacheEvent is nullptr!";
-      return;
-    }
-    if (!this->exited_.load()) {
-      for (const auto& hash_id : insert_keys) {
-        front_ptr->removed_cache.erase(hash_id);
-        front_ptr->stored_cache.insert(hash_id);
-      }
-    }
-  });
-
+size_t PrefixCacheWithUpload::insert(const std::vector<Block>& blocks) {
+  std::vector<Murmur3Key> insert_keys;
+  auto n_tokens = PrefixCache::insert(blocks, &insert_keys);
+  save_event_async(true, insert_keys);
   return n_tokens;
 }
 
 size_t PrefixCacheWithUpload::evict(size_t n_blocks) {
   std::vector<Murmur3Key> evict_keys;
   auto evict_count = PrefixCache::evict(n_blocks, &evict_keys);
+  save_event_async(false, evict_keys);
+  return evict_count;
+}
 
-  threadpool_.schedule([evict_keys = std::move(evict_keys), this]() {
+void PrefixCacheWithUpload::save_event_async(const bool is_insert,
+                                             std::vector<Murmur3Key>& keys) {
+  threadpool_.schedule([this, is_insert = is_insert, keys = std::move(keys)]() {
+    std::lock_guard<std::mutex> lock(this->mutex_);
     auto front_ptr = this->db_kvcache_events_.get_front_value();
     if (!front_ptr) {
       LOG(INFO) << "Front DoubleBufferKvCacheEvent is nullptr!";
       return;
     }
     if (!this->exited_.load()) {
-      for (const auto& hash_id : evict_keys) {
-        front_ptr->removed_cache.insert(hash_id);
-        front_ptr->stored_cache.erase(hash_id);
+      if (is_insert) {
+        for (const auto& hash_id : keys) {
+          front_ptr->removed_cache.erase(hash_id);
+          front_ptr->stored_cache.insert(hash_id);
+        }
+      } else {
+        for (const auto& hash_id : keys) {
+          front_ptr->removed_cache.insert(hash_id);
+          front_ptr->stored_cache.erase(hash_id);
+        }
       }
     }
   });
-
-  return evict_count;
 }
 
 KvCacheEvent* PrefixCacheWithUpload::get_upload_kvcache_events() {
-  db_kvcache_events_.swap();
+  {
+    std::lock_guard<std::mutex> lock(this->mutex_);
+    db_kvcache_events_.swap();
+  }
   if (!exited_.load()) {
     return db_kvcache_events_.get_back_value();
   } else {
diff --git a/xllm/core/framework/prefix_cache/prefix_cache_with_upload.h b/xllm/core/framework/prefix_cache/prefix_cache_with_upload.h
@@ -18,15 +18,22 @@ class PrefixCacheWithUpload final : public PrefixCache {
   size_t insert(const Slice<int32_t>& token_ids,
                 std::vector<Block>& blocks) override;
 
+  // insert the blocks with hash key into the prefix tree
+  size_t insert(const std::vector<Block>& blocks) override;
+
   // evict blocks hold by the prefix cache
   // return the actual number of evicted blocks
   size_t evict(size_t n_blocks) override;
 
   virtual KvCacheEvent* get_upload_kvcache_events() override;
 
+ private:
+  void save_event_async(const bool is_insert, std::vector<Murmur3Key>& keys);
+
  private:
   ThreadPool threadpool_;
 
+  std::mutex mutex_;
   DoubleBuffer<KvCacheEvent> db_kvcache_events_;
 };
 
diff --git a/xllm/xllm.cpp b/xllm/xllm.cpp
@@ -174,13 +174,13 @@ int run() {
       .reasoning_parser(FLAGS_reasoning_parser)
       .priority_strategy(FLAGS_priority_strategy)
       .enable_online_preempt_offline(FLAGS_enable_online_preempt_offline)
-      .enable_cache_upload(FLAGS_enable_prefix_cache &&
-                           FLAGS_enable_service_routing &&
-                           FLAGS_enable_cache_upload)
+      .enable_cache_upload(
+          (FLAGS_enable_service_routing || FLAGS_enable_disagg_pd) &&
+          FLAGS_enable_prefix_cache && FLAGS_enable_cache_upload)
       .host_blocks_factor(FLAGS_host_blocks_factor)
-      .enable_kvcache_store(FLAGS_enable_kvcache_store &&
-                            FLAGS_enable_prefix_cache &&
-                            (FLAGS_host_blocks_factor > 0.0))
+      .enable_kvcache_store(FLAGS_enable_prefix_cache &&
+                            FLAGS_enable_kvcache_store &&
+                            (FLAGS_host_blocks_factor > 1.0))
       .prefetch_timeout(FLAGS_prefetch_timeout)
       .prefetch_bacth_size(FLAGS_prefetch_bacth_size)
       .layers_wise_copy_batchs(FLAGS_layers_wise_copy_batchs)