Skip to content

Commit af4e4fb

Browse files
committed
Automatically adjust VLLM_DECODE_BLOCK_BUCKET_MIN if it exceeds max_blocks
Signed-off-by: Daniel Socek <[email protected]>
1 parent 3b629a8 commit af4e4fb

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

vllm_gaudi/extension/bucketing/linear.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ def get_decode_cfgs(self, max_num_seqs, block_size, max_num_batched_tokens, max_
6464
f'VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MAX={max_blocks}'
6565
)
6666
decode_block_bucket_cfg[2] = max_blocks
67+
if decode_block_bucket_cfg[0] > max_blocks:
68+
decode_block_bucket_min = max(1, max_blocks - decode_block_bucket_cfg[1])
69+
logger().info(
70+
f'VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_min}'
71+
)
72+
decode_block_bucket_cfg[0] = decode_block_bucket_min
6773

6874
msg = ("Decode bucket config (min, step, max_warmup) "
6975
f"bs:{decode_bs_bucket_cfg}, "

0 commit comments

Comments
 (0)