We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8df7a26 commit 8861b56Copy full SHA for 8861b56
tensorrt_llm/_torch/models/modeling_llama.py
@@ -77,7 +77,7 @@ def __init__(
77
else:
78
# Disable chunked attention when max_seq_len is smaller than attention_chunk_size
79
# TODO: Remove this after all attention kernels in TRTLLM backend support chunked attention
80
- if attention_chunk_size and model_config.max_seq_len and model_confg.max_seq_len < attention_chunk_size:
+ if attention_chunk_size and model_config.max_seq_len and model_config.max_seq_len < attention_chunk_size:
81
attention_chunk_size = None
82
83
super().__init__(
0 commit comments