Skip to content

Commit 3bc18ce

Browse files
committed
feat(config): validate max_num_batched_tokens and max_model_len in AscendSchedulerConfig
Signed-off-by: linfeng-yuan <[email protected]>
1 parent 1b40665 commit 3bc18ce

File tree

3 files changed

+66
-6
lines changed

3 files changed

+66
-6
lines changed

tests/e2e/singlecard/test_ascend_scheduler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def test_concurrent_partial_prefill():
1616
},
1717
},
1818
max_num_seqs=3,
19-
max_num_batched_tokens=200,
19+
max_num_batched_tokens=2048,
2020
enforce_eager=True,
2121
max_model_len=2048,
2222
gpu_memory_utilization=0.7) as vllm_model:
@@ -35,7 +35,7 @@ def test_prefix_cache_stats_is_recorded():
3535
},
3636
},
3737
max_num_seqs=3,
38-
max_num_batched_tokens=200,
38+
max_num_batched_tokens=2048,
3939
enforce_eager=True,
4040
max_model_len=2048,
4141
gpu_memory_utilization=0.7) as vllm_model:

tests/ut/core/test_schedule_config.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class TestAscendSchedulerConfig(TestBase):
2424
def setUp(self):
2525
self.basic_scheduler_config = SchedulerConfig(
2626
max_num_batched_tokens=8192,
27+
max_model_len=8192,
2728
is_multimodal_model=False,
2829
send_delta_data=False,
2930
scheduler_delay_factor=0,
@@ -51,6 +52,7 @@ def test_initialize_from_config_with_override(self):
5152
num_scheduler_steps=1,
5253
scheduler_cls="vllm_ascend.core.scheduler.AscendScheduler",
5354
max_num_batched_tokens=2048,
55+
max_model_len=2048,
5456
),
5557
)
5658
self.assertEqual(ascend_config.enable_chunked_prefill, False)
@@ -65,7 +67,11 @@ def test_not_implemented_policy(self):
6567
with self.assertRaises(NotImplementedError) as context:
6668
AscendSchedulerConfig.initialize_from_config(
6769
self.basic_scheduler_config,
68-
AscendSchedulerConfig(policy="custom_policy", ),
70+
AscendSchedulerConfig(
71+
policy="custom_policy",
72+
max_num_batched_tokens=2048,
73+
max_model_len=2048,
74+
),
6975
)
7076
self.assertIn(
7177
"currently AscendScheduler only supports fcfs policy",
@@ -83,7 +89,11 @@ def test_not_implemented_multi_step(self):
8389
with self.assertRaises(NotImplementedError) as context:
8490
AscendSchedulerConfig.initialize_from_config(
8591
self.basic_scheduler_config,
86-
AscendSchedulerConfig(num_scheduler_steps=2),
92+
AscendSchedulerConfig(
93+
num_scheduler_steps=2,
94+
max_num_batched_tokens=2048,
95+
max_model_len=2048,
96+
),
8797
)
8898
self.assertIn(
8999
"currently AscendScheduler doesn't support multi-step",
@@ -94,7 +104,12 @@ def test_not_implemented_send_delta_data(self):
94104
with self.assertRaises(NotImplementedError) as context:
95105
AscendSchedulerConfig.initialize_from_config(
96106
self.basic_scheduler_config,
97-
AscendSchedulerConfig(send_delta_data=True))
107+
AscendSchedulerConfig(
108+
send_delta_data=True,
109+
max_num_batched_tokens=2048,
110+
max_model_len=2048,
111+
),
112+
)
98113
self.assertIn(
99114
"currently AscendScheduler doesn't support send_delta_data",
100115
str(context.exception),
@@ -104,7 +119,12 @@ def test_not_implemented_delay_factor(self):
104119
with self.assertRaises(NotImplementedError) as context:
105120
AscendSchedulerConfig.initialize_from_config(
106121
self.basic_scheduler_config,
107-
AscendSchedulerConfig(delay_factor=1))
122+
AscendSchedulerConfig(
123+
delay_factor=1,
124+
max_num_batched_tokens=2048,
125+
max_model_len=2048,
126+
),
127+
)
108128
self.assertIn(
109129
"currently AscendScheduler doesn't support scheduler_delay_factor",
110130
str(context.exception),
@@ -115,3 +135,33 @@ def test_no_override(self):
115135
self.basic_scheduler_config, {})
116136
self.assertEqual(ascend_config.max_num_encoder_input_tokens, 8192)
117137
self.assertEqual(ascend_config.encoder_cache_size, 8192)
138+
139+
def test_valid_config_with_chunked_prefill(self):
140+
ascend_config = AscendSchedulerConfig.initialize_from_config(
141+
self.basic_scheduler_config,
142+
AscendSchedulerConfig(
143+
enable_chunked_prefill=True,
144+
max_num_batched_tokens=2048,
145+
max_model_len=4096,
146+
),
147+
)
148+
self.assertEqual(ascend_config.max_num_batched_tokens, 2048)
149+
self.assertEqual(ascend_config.max_model_len, 4096)
150+
self.assertTrue(ascend_config.enable_chunked_prefill)
151+
152+
def test_invalid_config_without_chunked_prefill(self):
153+
with self.assertRaises(ValueError) as context:
154+
AscendSchedulerConfig.initialize_from_config(
155+
self.basic_scheduler_config,
156+
AscendSchedulerConfig(
157+
enable_chunked_prefill=False,
158+
max_num_batched_tokens=2048,
159+
max_model_len=4096,
160+
),
161+
)
162+
self.assertIn(
163+
"Ascend scheduler is enabled without chunked prefill feature",
164+
str(context.exception)
165+
)
166+
self.assertIn("max_num_batched_tokens (2048)", str(context.exception))
167+
self.assertIn("max_model_len (4096)", str(context.exception))

vllm_ascend/core/schedule_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ def __post_init__(self) -> None:
5555
self.max_num_encoder_input_tokens = self.max_num_batched_tokens
5656
self.encoder_cache_size = self.max_num_batched_tokens
5757
self.chunked_prefill_enabled = self.enable_chunked_prefill
58+
if (self.max_num_batched_tokens < self.max_model_len
59+
and not self.chunked_prefill_enabled):
60+
raise ValueError(
61+
"Ascend scheduler is enabled without chunked prefill feature. "
62+
f"Argument max_num_batched_tokens ({self.max_num_batched_tokens}) is "
63+
f"smaller than max_model_len ({self.max_model_len}). "
64+
"This effectively limits the maximum sequence length to "
65+
"max_num_batched_tokens and makes vLLM reject longer "
66+
"sequences. Please increase max_num_batched_tokens or "
67+
"decrease max_model_len.")
5868
if self.policy != "fcfs":
5969
raise NotImplementedError(
6070
f"currently AscendScheduler only supports fcfs policy, got {self.policy}"

0 commit comments

Comments
 (0)