NVIDIA · leslie-fang25 · Sep 3, 2025 · Aug 29, 2025
@@ -14,7 +14,6 @@
 from tensorrt_llm import LLM, SamplingParams, logger
 from tensorrt_llm._torch.pyexecutor.kv_cache_connector import (
     KvCacheConnectorScheduler, KvCacheConnectorWorker, SchedulerOutput)
-from tensorrt_llm.bindings.executor import ExecutorConfig
 from tensorrt_llm.bindings.internal.batch_manager import LlmRequest
 from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig
 
@@ -34,8 +33,8 @@ class PersistentKvCacheConnectorMetadata:
 
 class PersistentKvCacheConnectorWorker(KvCacheConnectorWorker):
 
-    def __init__(self, executor_config: ExecutorConfig):
-        super().__init__(executor_config)
+    def __init__(self):
+        super().__init__()
 
         self.kv_cache_tensor = None
 
@@ -81,10 +80,10 @@ def get_finished(
 
 class PersistentKvCacheConnectorLeader(KvCacheConnectorScheduler):
 
-    def __init__(self, executor_config: ExecutorConfig):
-        super().__init__(executor_config)
+    def __init__(self, tokens_per_block):
+        super().__init__()
 
-        self.block_size = self._config.tokens_per_block
+        self.block_size = tokens_per_block
         self.pending_loads = {}
 
         self.cache_folder = os.environ.get(CONNECTOR_CACHE_FOLDER_KEY,

@@ -44,7 +44,6 @@
 
 from tensorrt_llm._utils import mpi_allgather, mpi_broadcast, mpi_rank
 from tensorrt_llm.bindings import LlmRequestState
-from tensorrt_llm.bindings.executor import ExecutorConfig
 from tensorrt_llm.bindings.internal.batch_manager import \
     KvCacheConnectorManager as KvCacheConnectorManagerCpp
 from tensorrt_llm.bindings.internal.batch_manager import LlmRequest
@@ -81,8 +80,7 @@ class SchedulerOutput:
 
 class KvCacheConnectorWorker(ABC):
 
-    def __init__(self, config: ExecutorConfig):
-        self._config = config
+    def __init__(self):
         self._metadata = None
         super().__init__()
 
@@ -162,8 +160,7 @@ def get_finished(
 
 class KvCacheConnectorScheduler(ABC):
 
-    def __init__(self, executor_config: ExecutorConfig):
-        self._config = executor_config
+    def __init__(self):
         super().__init__()
 
     @abstractmethod

@@ -409,12 +409,11 @@ def create_py_executor(
             # In this case, the worker may be dependent on the scheduler, or vice-versa.
             # To deal with cases like this, we instantiate them both concurrently.
             with ThreadPoolExecutor(max_workers=2) as executor:
-                connector_worker_task = executor.submit(worker_cls,
-                                                        executor_config)
+                connector_worker_task = executor.submit(worker_cls)
 
                 if scheduler_cls is not None and rank == 0:
                     connector_scheduler_task = executor.submit(
-                        scheduler_cls, executor_config)
+                        scheduler_cls, executor_config.tokens_per_block)
                     connector_scheduler = connector_scheduler_task.result()
                 else:
                     connector_scheduler = None