vllm-project · adobrzyn · Nov 13, 2025 · Oct 20, 2025 · Nov 13, 2025 · adobrzyn
@@ -68,6 +68,12 @@ def get_decode_cfgs(self, max_num_seqs, block_size, max_num_batched_tokens, max_
                 f'VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MAX={decode_block_bucket_cfg[2]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MAX={max_blocks}'
             )
             decode_block_bucket_cfg[2] = max_blocks
+            if decode_block_bucket_cfg[0] > max_blocks:
+                decode_block_bucket_min = max(1, max_blocks - decode_block_bucket_cfg[1])
+                logger().info(
+                    f'VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} is higher than max_blocks={max_blocks}. Your configuration VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_cfg[0]} will be overwritten to VLLM_DECODE_BLOCK_BUCKET_MIN={decode_block_bucket_min}'
+                )
+                decode_block_bucket_cfg[0] = decode_block_bucket_min
 
         msg = ("Decode bucket config (min, step, max_warmup) "
                f"bs:{decode_bs_bucket_cfg}, "