[None][chore] rm executor config in kv cache connector

leslie-fang25 · leslie-fang25 · commit b27a7b541636 · 2025-08-29T03:42:43.000-07:00
Signed-off-by: leslie-fang25 &lt;leslief@nvidia.com&gt;
diff --git a/examples/llm-api/llm_kv_cache_connector.py b/examples/llm-api/llm_kv_cache_connector.py
@@ -14,7 +14,6 @@
 from tensorrt_llm import LLM, SamplingParams, logger
 from tensorrt_llm._torch.pyexecutor.kv_cache_connector import (
     KvCacheConnectorScheduler, KvCacheConnectorWorker, SchedulerOutput)
-from tensorrt_llm.bindings.executor import ExecutorConfig
 from tensorrt_llm.bindings.internal.batch_manager import LlmRequest
 from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig
 
@@ -34,8 +33,8 @@ class PersistentKvCacheConnectorMetadata:
 
 class PersistentKvCacheConnectorWorker(KvCacheConnectorWorker):
 
-    def __init__(self, executor_config: ExecutorConfig):
-        super().__init__(executor_config)
+    def __init__(self):
+        super().__init__()
 
         self.kv_cache_tensor = None
 
@@ -81,10 +80,10 @@ def get_finished(
 
 class PersistentKvCacheConnectorLeader(KvCacheConnectorScheduler):
 
-    def __init__(self, executor_config: ExecutorConfig):
-        super().__init__(executor_config)
+    def __init__(self, tokens_per_block):
+        super().__init__()
 
-        self.block_size = self._config.tokens_per_block
+        self.block_size = tokens_per_block
         self.pending_loads = {}
 
         self.cache_folder = os.environ.get(CONNECTOR_CACHE_FOLDER_KEY,
diff --git a/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py b/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py
@@ -44,7 +44,6 @@
 
 from tensorrt_llm._utils import mpi_allgather, mpi_broadcast, mpi_rank
 from tensorrt_llm.bindings import LlmRequestState
-from tensorrt_llm.bindings.executor import ExecutorConfig
 from tensorrt_llm.bindings.internal.batch_manager import \
     KvCacheConnectorManager as KvCacheConnectorManagerCpp
 from tensorrt_llm.bindings.internal.batch_manager import LlmRequest
@@ -81,8 +80,7 @@ class SchedulerOutput:
 
 class KvCacheConnectorWorker(ABC):
 
-    def __init__(self, config: ExecutorConfig):
-        self._config = config
+    def __init__(self):
         self._metadata = None
         super().__init__()
 
@@ -162,8 +160,7 @@ def get_finished(
 
 class KvCacheConnectorScheduler(ABC):
 
-    def __init__(self, executor_config: ExecutorConfig):
-        self._config = executor_config
+    def __init__(self):
         super().__init__()
 
     @abstractmethod
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
@@ -409,12 +409,11 @@ def create_py_executor(
             # In this case, the worker may be dependent on the scheduler, or vice-versa.
             # To deal with cases like this, we instantiate them both concurrently.
             with ThreadPoolExecutor(max_workers=2) as executor:
-                connector_worker_task = executor.submit(worker_cls,
-                                                        executor_config)
+                connector_worker_task = executor.submit(worker_cls)
 
                 if scheduler_cls is not None and rank == 0:
                     connector_scheduler_task = executor.submit(
-                        scheduler_cls, executor_config)
+                        scheduler_cls, executor_config.tokens_per_block)
                     connector_scheduler = connector_scheduler_task.result()
                 else:
                     connector_scheduler = None