@@ -87,7 +87,7 @@ func processHeaderForLatencyPrediction(
8787 KVCachePercentage : m .KVCacheUsagePercent ,
8888 InputTokenLength : len (strings .Fields (sloCtx .schedulingRequest .Body .Completions .Prompt )),
8989 NumRequestWaiting : m .WaitingQueueSize ,
90- NumRequestRunning : m .RunningQueueSize ,
90+ NumRequestRunning : m .RunningRequestsSize ,
9191 NumTokensGenerated : 0 ,
9292 PrefixCacheScore : prefix_cache_score ,
9393 }
@@ -174,7 +174,7 @@ func recordTTFTTrainingData(
174174 ActualTPOT : 0 ,
175175 Timestamp : now ,
176176 NumRequestWaiting : m .WaitingQueueSize ,
177- NumRequestRunning : m .RunningQueueSize ,
177+ NumRequestRunning : m .RunningRequestsSize ,
178178 NumTokensGenerated : 0 ,
179179 PrefixCacheScore : prefixCacheScore ,
180180 }
@@ -201,7 +201,7 @@ func predictFirstTPOT(
201201 KVCachePercentage : m .KVCacheUsagePercent ,
202202 InputTokenLength : len (strings .Fields (sloCtx .schedulingRequest .Body .Completions .Prompt )),
203203 NumRequestWaiting : m .WaitingQueueSize ,
204- NumRequestRunning : m .RunningQueueSize ,
204+ NumRequestRunning : m .RunningRequestsSize ,
205205 NumTokensGenerated : sloCtx .generatedTokenCount ,
206206 PrefixCacheScore : 0 ,
207207 }
@@ -260,7 +260,7 @@ func processTokenForLatencyPrediction(
260260 ActualTPOT : latencyMs ,
261261 Timestamp : now ,
262262 NumRequestWaiting : m .WaitingQueueSize ,
263- NumRequestRunning : m .RunningQueueSize ,
263+ NumRequestRunning : m .RunningRequestsSize ,
264264 NumTokensGenerated : sloCtx .generatedTokenCount - 1 ,
265265 PrefixCacheScore : 0 , // TPOT does not use prefix cache score
266266 }
@@ -274,7 +274,7 @@ func processTokenForLatencyPrediction(
274274 KVCachePercentage : m .KVCacheUsagePercent ,
275275 InputTokenLength : len (strings .Fields (sloCtx .schedulingRequest .Body .Completions .Prompt )),
276276 NumRequestWaiting : m .WaitingQueueSize ,
277- NumRequestRunning : m .RunningQueueSize ,
277+ NumRequestRunning : m .RunningRequestsSize ,
278278 NumTokensGenerated : sloCtx .generatedTokenCount ,
279279 PrefixCacheScore : 0 , // TPOT does not use prefix cache score
280280 }
@@ -337,7 +337,7 @@ func bulkPredictWithMetrics(
337337 KVCachePercentage : metricsStates [i ].KVCacheUsagePercent ,
338338 InputTokenLength : len (strings .Fields (prompts [i ])),
339339 NumRequestWaiting : metricsStates [i ].WaitingQueueSize ,
340- NumRequestRunning : metricsStates [i ].RunningQueueSize ,
340+ NumRequestRunning : metricsStates [i ].RunningRequestsSize ,
341341 NumTokensGenerated : generatedTokenCounts [i ],
342342 PrefixCacheScore : prefixCacheScores [i ],
343343 }
@@ -385,7 +385,7 @@ func bulkPredictWithMetrics(
385385 "generated_tokens" , bulkRequests [i ].NumTokensGenerated ,
386386 "kv_cache_percent" , bulkRequests [i ].KVCachePercentage ,
387387 "waiting_queue" , bulkRequests [i ].NumRequestWaiting ,
388- "running_queue " , bulkRequests [i ].NumRequestRunning ,
388+ "running_requests " , bulkRequests [i ].NumRequestRunning ,
389389 "prefix_cache_score" , bulkRequests [i ].PrefixCacheScore )
390390 }
391391 }
0 commit comments