File tree Expand file tree Collapse file tree 1 file changed +7
-1
lines changed
tensorrt_llm/_torch/attention_backend Expand file tree Collapse file tree 1 file changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -763,8 +763,14 @@ def prepare(self) -> None:
763
763
self .kv_cache_block_offsets [:, :self .num_seqs ].copy_ (
764
764
self .host_kv_cache_block_offsets [:, :self .num_seqs ],
765
765
non_blocking = True )
766
+
767
+ error_message = (
768
+ f"The max KV cache length of input sequences ({ self .kv_lens [:self .num_seqs ].max ()} ) "
769
+ f"exceeds the KV cache manager's maximum supported length "
770
+ f"({ self .kv_cache_manager .max_seq_len } )." )
771
+
766
772
assert self .kv_lens [:self .num_seqs ].max (
767
- ) <= self .kv_cache_manager .max_seq_len , f"Please set max_seq_len to at least { self . kv_lens [: self . num_seqs ]. max () } for kv cache manager."
773
+ ) <= self .kv_cache_manager .max_seq_len , error_message
768
774
769
775
self .kv_lens_cuda_runtime = self .kv_lens_cuda [:self .num_seqs ]
770
776
self .kv_lens_runtime = self .kv_lens [:self .num_seqs ]
You can’t perform that action at this time.
0 commit comments