Skip to content

Commit a40ba66

Browse files
committed
Automatically adjust VLLM_DECODE_BLOCK_BUCKET_MIN if it exceeds max_blocks
Signed-off-by: Daniel Socek <[email protected]>
1 parent dbf9e48 commit a40ba66

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

vllm_gaudi/extension/bucketing/linear.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def get_decode_cfgs(self, max_num_seqs, block_size, max_num_batched_tokens, max_
6868
f'VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MAX={max_blocks}'
6969
)
7070
decode_block_bucket_cfg[2] = max_blocks
71+
if decode_block_bucket_cfg[0] > max_blocks:
72+
decode_block_bucket_min = max(1, max_blocks - decode_block_bucket_cfg[1])
73+
logger().info(
74+
f'VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_min}'
75+
)
76+
decode_block_bucket_cfg[0] = decode_block_bucket_min
7177

7278
msg = ("Decode bucket config (min, step, max_warmup) "
7379
f"bs:{decode_bs_bucket_cfg}, "

0 commit comments

Comments
 (0)