Skip to content

Commit 99bdd52

Browse files
committed
Automatically adjust VLLM_DECODE_BLOCK_BUCKET_MIN if it exceeds max_blocks
Signed-off-by: Daniel Socek <[email protected]>
1 parent 3b629a8 commit 99bdd52

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

vllm_gaudi/extension/bucketing/linear.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ def get_decode_cfgs(self, max_num_seqs, block_size, max_num_batched_tokens, max_
6464
f'VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MAX={max_blocks}'
6565
)
6666
decode_block_bucket_cfg[2] = max_blocks
67+
if decode_block_bucket_cfg[0] > max_blocks:
68+
logger().info(
69+
f'VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MIN={max_blocks - decode_block_bucket_cfg[1]}'
70+
)
71+
decode_block_bucket_cfg[0] = max_blocks - decode_block_bucket_cfg[1]
6772

6873
msg = ("Decode bucket config (min, step, max_warmup) "
6974
f"bs:{decode_bs_bucket_cfg}, "

0 commit comments

Comments
 (0)