Fix block_size initialization in KVCacheManager

Minsung-commit · Minsung-commit · commit 789f1e81abeb · 2025-12-05T08:35:11.000+09:00
Use actual KV cache block size from kv_cache_config instead of hash_block_size. **Issue**: The previous implementation incorrectly used `hash_block_size` for token metrics calculation. The hash_block_size is used for hashing granularity, not for the actual KV cache block size used by BlockPool. **Fix**: Initialize `self.block_size` from `kv_cache_config.kv_cache_groups[].kv_cache_spec.block_size`, which represents the actual block size used for token storage. **Impact**: This ensures token-level metrics (total_tokens, used_tokens, free_tokens) accurately reflect the real KV cache capacity, especially for models using larger block sizes than the hash granularity. Addresses bot review feedback on PR vllm-project#29836. Signed-off-by: Minsung-commit <dialstjd931203@gmail.com>
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
@@ -106,9 +106,22 @@ def __init__(
         metrics_collector: KVCacheMetricsCollector | None = None,
     ) -> None:
         self.max_model_len = max_model_len
-        self.block_size = hash_block_size
 
         self.enable_caching = enable_caching
+        
+        # Initialize block_size from kv_cache_config
+        # Note: We use the actual KV cache block size, not hash_block_size
+        self.block_size: int | None = None
+        if self.enable_caching:
+            # Ensure all kv_cache_groups have the same block_size
+            block_sizes = set(
+                g.kv_cache_spec.block_size
+                for g in kv_cache_config.kv_cache_groups
+            )
+            assert len(block_sizes) == 1,                 "Only one block size is supported for now"
+            self.block_size = kv_cache_config.kv_cache_groups[
+                0].kv_cache_spec.block_size
+            # Note: DCP/PCP scaling handled by kv_cache_config if needed
         self.use_eagle = use_eagle
         self.log_stats = log_stats
         self.metrics_collector = metrics_collector