File tree Expand file tree Collapse file tree 3 files changed +9
-14
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 3 files changed +9
-14
lines changed Original file line number Diff line number Diff line change 14
14
from tensorrt_llm import LLM , SamplingParams , logger
15
15
from tensorrt_llm ._torch .pyexecutor .kv_cache_connector import (
16
16
KvCacheConnectorScheduler , KvCacheConnectorWorker , SchedulerOutput )
17
- from tensorrt_llm .bindings .executor import ExecutorConfig
18
17
from tensorrt_llm .bindings .internal .batch_manager import LlmRequest
19
18
from tensorrt_llm .llmapi .llm_args import KvCacheConnectorConfig
20
19
@@ -34,8 +33,8 @@ class PersistentKvCacheConnectorMetadata:
34
33
35
34
class PersistentKvCacheConnectorWorker (KvCacheConnectorWorker ):
36
35
37
- def __init__ (self , executor_config : ExecutorConfig ):
38
- super ().__init__ (executor_config )
36
+ def __init__ (self ):
37
+ super ().__init__ ()
39
38
40
39
self .kv_cache_tensor = None
41
40
@@ -81,10 +80,10 @@ def get_finished(
81
80
82
81
class PersistentKvCacheConnectorLeader (KvCacheConnectorScheduler ):
83
82
84
- def __init__ (self , executor_config : ExecutorConfig ):
85
- super ().__init__ (executor_config )
83
+ def __init__ (self , tokens_per_block ):
84
+ super ().__init__ ()
86
85
87
- self .block_size = self . _config . tokens_per_block
86
+ self .block_size = tokens_per_block
88
87
self .pending_loads = {}
89
88
90
89
self .cache_folder = os .environ .get (CONNECTOR_CACHE_FOLDER_KEY ,
Original file line number Diff line number Diff line change 44
44
45
45
from tensorrt_llm ._utils import mpi_allgather , mpi_broadcast , mpi_rank
46
46
from tensorrt_llm .bindings import LlmRequestState
47
- from tensorrt_llm .bindings .executor import ExecutorConfig
48
47
from tensorrt_llm .bindings .internal .batch_manager import \
49
48
KvCacheConnectorManager as KvCacheConnectorManagerCpp
50
49
from tensorrt_llm .bindings .internal .batch_manager import LlmRequest
@@ -81,8 +80,7 @@ class SchedulerOutput:
81
80
82
81
class KvCacheConnectorWorker (ABC ):
83
82
84
- def __init__ (self , config : ExecutorConfig ):
85
- self ._config = config
83
+ def __init__ (self ):
86
84
self ._metadata = None
87
85
super ().__init__ ()
88
86
@@ -162,8 +160,7 @@ def get_finished(
162
160
163
161
class KvCacheConnectorScheduler (ABC ):
164
162
165
- def __init__ (self , executor_config : ExecutorConfig ):
166
- self ._config = executor_config
163
+ def __init__ (self ):
167
164
super ().__init__ ()
168
165
169
166
@abstractmethod
Original file line number Diff line number Diff line change @@ -409,12 +409,11 @@ def create_py_executor(
409
409
# In this case, the worker may be dependent on the scheduler, or vice-versa.
410
410
# To deal with cases like this, we instantiate them both concurrently.
411
411
with ThreadPoolExecutor (max_workers = 2 ) as executor :
412
- connector_worker_task = executor .submit (worker_cls ,
413
- executor_config )
412
+ connector_worker_task = executor .submit (worker_cls )
414
413
415
414
if scheduler_cls is not None and rank == 0 :
416
415
connector_scheduler_task = executor .submit (
417
- scheduler_cls , executor_config )
416
+ scheduler_cls , executor_config . tokens_per_block )
418
417
connector_scheduler = connector_scheduler_task .result ()
419
418
else :
420
419
connector_scheduler = None
You can’t perform that action at this time.
0 commit comments