Skip to content

Commit cd1b809

Browse files
authored
[https://nvbugs/5374016][fix] improve error message (#6893)
Signed-off-by: junq <[email protected]>
1 parent fef2f1f commit cd1b809

File tree

1 file changed

+7
-1
lines changed
  • tensorrt_llm/_torch/attention_backend

1 file changed

+7
-1
lines changed

tensorrt_llm/_torch/attention_backend/trtllm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -763,8 +763,14 @@ def prepare(self) -> None:
763763
self.kv_cache_block_offsets[:, :self.num_seqs].copy_(
764764
self.host_kv_cache_block_offsets[:, :self.num_seqs],
765765
non_blocking=True)
766+
767+
error_message = (
768+
f"The max KV cache length of input sequences ({self.kv_lens[:self.num_seqs].max()}) "
769+
f"exceeds the KV cache manager's maximum supported length "
770+
f"({self.kv_cache_manager.max_seq_len}).")
771+
766772
assert self.kv_lens[:self.num_seqs].max(
767-
) <= self.kv_cache_manager.max_seq_len, f"Please set max_seq_len to at least {self.kv_lens[:self.num_seqs].max()} for kv cache manager."
773+
) <= self.kv_cache_manager.max_seq_len, error_message
768774

769775
self.kv_lens_cuda_runtime = self.kv_lens_cuda[:self.num_seqs]
770776
self.kv_lens_runtime = self.kv_lens[:self.num_seqs]

0 commit comments

Comments
 (0)