@@ -107,42 +107,40 @@ def prepare_draft_tokens(self,
107
107
assert drafter .should_use_spec_decode (active_requests ,
108
108
max_batch_size = 8 ,
109
109
max_num_tokens = 4096 * 8 ,
110
- max_draft_len = 4 ) is True
110
+ max_draft_len = 4 )
111
111
112
112
# Small batch size ON case: num_effective_requests = min(12, 5, very_large) = 5 <= 6 → True
113
113
active_requests = [object ()] * 12
114
114
assert drafter .should_use_spec_decode (active_requests ,
115
115
max_batch_size = 5 ,
116
116
max_num_tokens = 4096 * 8 ,
117
- max_draft_len = 4 ) is True
117
+ max_draft_len = 4 )
118
118
119
119
# Small token budget ON case: token_cap = 28 // (1+4) = 5 → min(8, 12, 5) = 5 <= 6 → True
120
120
active_requests = [object ()] * 12
121
121
assert drafter .should_use_spec_decode (active_requests ,
122
122
max_batch_size = 8 ,
123
123
max_num_tokens = 28 ,
124
- max_draft_len = 4 ) is True
124
+ max_draft_len = 4 )
125
125
126
126
# Generic OFF case: num_effective_requests = min(12, 8, very_large) = 8 > 6 → False
127
127
active_requests = [object ()] * 12
128
- assert drafter .should_use_spec_decode (active_requests ,
129
- max_batch_size = 8 ,
130
- max_num_tokens = 4096 * 8 ,
131
- max_draft_len = 4 ) is False
128
+ assert not drafter .should_use_spec_decode (active_requests ,
129
+ max_batch_size = 8 ,
130
+ max_num_tokens = 4096 * 8 ,
131
+ max_draft_len = 4 )
132
132
133
133
# Edge case - None active requests OFF case
134
134
active_requests = []
135
- assert drafter .should_use_spec_decode (active_requests ,
136
- max_batch_size = 8 ,
137
- max_num_tokens = 4096 * 8 ,
138
- max_draft_len = 4 ) is False
135
+ assert not drafter .should_use_spec_decode (active_requests ,
136
+ max_batch_size = 8 ,
137
+ max_num_tokens = 4096 * 8 ,
138
+ max_draft_len = 4 )
139
139
140
140
# Edge case - Token cap equals 0 OFF case: token_cap = 4 // (1+4) = 0 → min(12, 8, 0) = 0 <= 6 → False
141
141
active_requests = [object ()] * 12
142
- assert drafter .should_use_spec_decode (active_requests ,
143
- max_batch_size = 8 ,
144
- max_num_tokens = 4 ,
145
- max_draft_len = 4 ) is False
142
+ assert not drafter .should_use_spec_decode (
143
+ active_requests , max_batch_size = 8 , max_num_tokens = 4 , max_draft_len = 4 )
146
144
147
145
148
146
if __name__ == "__main__" :
0 commit comments