Skip to content

Commit 31f5ed3

Browse files
feat: add finish reason = tool_calls for stream=False and phi-4 detect token start fix (#3087)
Signed-off-by: Elyas Mehtabuddin <[email protected]>
1 parent ef6734d commit 31f5ed3

File tree

6 files changed

+804
-41
lines changed

6 files changed

+804
-41
lines changed

lib/bindings/python/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/llm/src/protocols/openai/chat_completions/aggregator.rs

Lines changed: 273 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,13 @@ impl DeltaAggregator {
181181
.collect();
182182

183183
// Initialize and push the converted tool calls to state_choice.tool_calls
184-
if let Some(existing_tool_calls) = &mut state_choice.tool_calls {
185-
existing_tool_calls.extend(converted_tool_calls);
186-
} else {
187-
state_choice.tool_calls = Some(converted_tool_calls);
184+
// Only set tool_calls to Some if there are actual tool calls
185+
if !converted_tool_calls.is_empty() {
186+
if let Some(existing_tool_calls) = &mut state_choice.tool_calls {
187+
existing_tool_calls.extend(converted_tool_calls);
188+
} else {
189+
state_choice.tool_calls = Some(converted_tool_calls);
190+
}
188191
}
189192
}
190193

@@ -257,6 +260,17 @@ impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
257260
/// # Note
258261
/// The `function_call` field is deprecated.
259262
fn from(delta: DeltaChoice) -> Self {
263+
// If tool calls are present and non-empty, finish reason should be ToolCalls
264+
let finish_reason = if delta
265+
.tool_calls
266+
.as_ref()
267+
.is_some_and(|calls| !calls.is_empty())
268+
{
269+
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
270+
} else {
271+
delta.finish_reason
272+
};
273+
260274
dynamo_async_openai::types::ChatChoice {
261275
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
262276
role: delta.role.expect("delta should have a Role"),
@@ -272,7 +286,7 @@ impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
272286
reasoning_content: delta.reasoning_content,
273287
},
274288
index: delta.index,
275-
finish_reason: delta.finish_reason,
289+
finish_reason,
276290
logprobs: delta.logprobs,
277291
}
278292
}
@@ -347,7 +361,7 @@ mod tests {
347361
tool_calls.map(|tool_calls| serde_json::from_str(tool_calls).unwrap());
348362

349363
let tool_call_chunks = if let Some(tool_calls) = tool_calls {
350-
vec![
364+
Some(vec![
351365
dynamo_async_openai::types::ChatCompletionMessageToolCallChunk {
352366
index: 0,
353367
id: Some("test_id".to_string()),
@@ -357,22 +371,15 @@ mod tests {
357371
arguments: Some(serde_json::to_string(&tool_calls["arguments"]).unwrap()),
358372
}),
359373
},
360-
]
374+
])
361375
} else {
362-
vec![
363-
dynamo_async_openai::types::ChatCompletionMessageToolCallChunk {
364-
index: 0,
365-
id: None,
366-
r#type: None,
367-
function: None,
368-
},
369-
]
376+
None
370377
};
371378

372379
let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
373380
content: Some(text.to_string()),
374381
function_call: None,
375-
tool_calls: Some(tool_call_chunks),
382+
tool_calls: tool_call_chunks,
376383
role,
377384
refusal: None,
378385
reasoning_content: None,
@@ -625,6 +632,215 @@ mod tests {
625632
);
626633
}
627634

635+
#[tokio::test]
636+
async fn test_tool_calling_finish_reason_override_from_stop() {
637+
// Test that when tool calls are present but finish reason is Stop, it gets overridden to ToolCalls
638+
let tool_call_json =
639+
r#"{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}"#;
640+
641+
let annotated_delta = create_test_delta(
642+
0,
643+
"I'll check the weather for you.",
644+
Some(dynamo_async_openai::types::Role::Assistant),
645+
Some(dynamo_async_openai::types::FinishReason::Stop), // Original finish reason is Stop
646+
None,
647+
Some(tool_call_json),
648+
);
649+
650+
let data = annotated_delta.data.unwrap();
651+
let annotated_delta = Annotated {
652+
data: Some(data),
653+
id: Some("test_id".to_string()),
654+
event: None,
655+
comment: None,
656+
};
657+
let stream = Box::pin(stream::iter(vec![annotated_delta]));
658+
659+
let result = DeltaAggregator::apply(stream, ParsingOptions::default()).await;
660+
661+
assert!(result.is_ok());
662+
let response = result.unwrap();
663+
assert_eq!(response.choices.len(), 1);
664+
let choice = &response.choices[0];
665+
666+
// Verify tool calls are present
667+
assert!(choice.message.tool_calls.is_some());
668+
let tool_calls = choice.message.tool_calls.as_ref().unwrap();
669+
assert_eq!(tool_calls.len(), 1);
670+
671+
// Most importantly, verify that finish reason was overridden to ToolCalls despite original being Stop
672+
assert_eq!(
673+
choice.finish_reason,
674+
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
675+
);
676+
}
677+
678+
#[tokio::test]
679+
async fn test_tool_calling_finish_reason_override_from_length() {
680+
// Test that when tool calls are present but finish reason is Length, it gets overridden to ToolCalls
681+
let tool_call_json = r#"{"name": "search", "arguments": {"query": "rust programming"}}"#;
682+
683+
let annotated_delta = create_test_delta(
684+
0,
685+
"Let me search for that.",
686+
Some(dynamo_async_openai::types::Role::Assistant),
687+
Some(dynamo_async_openai::types::FinishReason::Length), // Original finish reason is Length
688+
None,
689+
Some(tool_call_json),
690+
);
691+
692+
let data = annotated_delta.data.unwrap();
693+
let annotated_delta = Annotated {
694+
data: Some(data),
695+
id: Some("test_id".to_string()),
696+
event: None,
697+
comment: None,
698+
};
699+
let stream = Box::pin(stream::iter(vec![annotated_delta]));
700+
701+
let result = DeltaAggregator::apply(stream, ParsingOptions::default()).await;
702+
703+
assert!(result.is_ok());
704+
let response = result.unwrap();
705+
assert_eq!(response.choices.len(), 1);
706+
let choice = &response.choices[0];
707+
708+
// Verify tool calls are present
709+
assert!(choice.message.tool_calls.is_some());
710+
let tool_calls = choice.message.tool_calls.as_ref().unwrap();
711+
assert_eq!(tool_calls.len(), 1);
712+
713+
// Verify that finish reason was overridden to ToolCalls despite original being Length
714+
assert_eq!(
715+
choice.finish_reason,
716+
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
717+
);
718+
}
719+
720+
#[tokio::test]
721+
async fn test_tool_calling_finish_reason_override_from_none() {
722+
// Test that when tool calls are present but finish reason is None, it gets set to ToolCalls
723+
let tool_call_json = r#"{"name": "calculate", "arguments": {"expression": "2+2"}}"#;
724+
725+
let annotated_delta = create_test_delta(
726+
0,
727+
"I'll calculate that for you.",
728+
Some(dynamo_async_openai::types::Role::Assistant),
729+
None, // Original finish reason is None
730+
None,
731+
Some(tool_call_json),
732+
);
733+
734+
let data = annotated_delta.data.unwrap();
735+
let annotated_delta = Annotated {
736+
data: Some(data),
737+
id: Some("test_id".to_string()),
738+
event: None,
739+
comment: None,
740+
};
741+
let stream = Box::pin(stream::iter(vec![annotated_delta]));
742+
743+
let result = DeltaAggregator::apply(stream, ParsingOptions::default()).await;
744+
745+
assert!(result.is_ok());
746+
let response = result.unwrap();
747+
assert_eq!(response.choices.len(), 1);
748+
let choice = &response.choices[0];
749+
750+
// Verify tool calls are present
751+
assert!(choice.message.tool_calls.is_some());
752+
let tool_calls = choice.message.tool_calls.as_ref().unwrap();
753+
assert_eq!(tool_calls.len(), 1);
754+
755+
// Verify that finish reason was set to ToolCalls despite original being None
756+
assert_eq!(
757+
choice.finish_reason,
758+
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
759+
);
760+
}
761+
762+
#[tokio::test]
763+
async fn test_no_tool_calling_preserves_original_finish_reason() {
764+
// Test that when no tool calls are present, the original finish reason is preserved
765+
let annotated_delta = create_test_delta(
766+
0,
767+
"This is a regular response without tool calls.",
768+
Some(dynamo_async_openai::types::Role::Assistant),
769+
Some(dynamo_async_openai::types::FinishReason::Stop),
770+
None,
771+
None, // No tool calls
772+
);
773+
774+
let data = annotated_delta.data.unwrap();
775+
let annotated_delta = Annotated {
776+
data: Some(data),
777+
id: Some("test_id".to_string()),
778+
event: None,
779+
comment: None,
780+
};
781+
let stream = Box::pin(stream::iter(vec![annotated_delta]));
782+
783+
let result = DeltaAggregator::apply(stream, ParsingOptions::default()).await;
784+
785+
assert!(result.is_ok());
786+
let response = result.unwrap();
787+
assert_eq!(response.choices.len(), 1);
788+
let choice = &response.choices[0];
789+
790+
// Verify no tool calls are present
791+
assert!(choice.message.tool_calls.is_none());
792+
793+
// Verify that original finish reason (Stop) is preserved
794+
assert_eq!(
795+
choice.finish_reason,
796+
Some(dynamo_async_openai::types::FinishReason::Stop)
797+
);
798+
}
799+
800+
#[tokio::test]
801+
async fn test_empty_tool_calls_preserves_original_finish_reason() {
802+
// Test that when tool calls array is empty, the original finish reason is preserved
803+
// Create a delta with empty tool calls by modifying the create_test_delta output
804+
let mut annotated_delta = create_test_delta(
805+
0,
806+
"Response with empty tool calls array.",
807+
Some(dynamo_async_openai::types::Role::Assistant),
808+
Some(dynamo_async_openai::types::FinishReason::Length),
809+
None,
810+
None,
811+
);
812+
813+
// Manually set empty tool calls array
814+
if let Some(ref mut data) = annotated_delta.data {
815+
data.choices[0].delta.tool_calls = Some(vec![]); // Empty tool calls array
816+
}
817+
818+
let data = annotated_delta.data.unwrap();
819+
let annotated_delta = Annotated {
820+
data: Some(data),
821+
id: Some("test_id".to_string()),
822+
event: None,
823+
comment: None,
824+
};
825+
let stream = Box::pin(stream::iter(vec![annotated_delta]));
826+
827+
let result = DeltaAggregator::apply(stream, ParsingOptions::default()).await;
828+
829+
assert!(result.is_ok());
830+
let response = result.unwrap();
831+
assert_eq!(response.choices.len(), 1);
832+
let choice = &response.choices[0];
833+
834+
// Verify tool calls array is empty
835+
assert!(choice.message.tool_calls.is_none());
836+
837+
// Verify that original finish reason (Length) is preserved since tool calls are empty
838+
assert_eq!(
839+
choice.finish_reason,
840+
Some(dynamo_async_openai::types::FinishReason::Length)
841+
);
842+
}
843+
628844
#[tokio::test]
629845
async fn test_tool_calling_output() {
630846
// Simulate a delta with a tool call in the content
@@ -688,4 +904,45 @@ mod tests {
688904
dynamo_async_openai::types::Role::Assistant
689905
);
690906
}
907+
908+
#[tokio::test]
909+
async fn test_tool_calling_finish_reason_override_from_stop_alternative() {
910+
// Test that when tool calls are present but finish reason is Stop, it gets overridden to ToolCalls
911+
let tool_call_json =
912+
r#"{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}"#;
913+
914+
let annotated_delta = create_test_delta(
915+
0,
916+
"Getting weather for New York",
917+
Some(dynamo_async_openai::types::Role::Assistant),
918+
Some(dynamo_async_openai::types::FinishReason::Stop), // This should be overridden
919+
None,
920+
Some(tool_call_json),
921+
);
922+
923+
let stream = Box::pin(stream::iter(vec![annotated_delta]));
924+
925+
// Call DeltaAggregator::apply
926+
let result = DeltaAggregator::apply(stream, ParsingOptions::default()).await;
927+
928+
// Check the result
929+
assert!(result.is_ok());
930+
let response = result.unwrap();
931+
932+
// There should be one choice
933+
assert_eq!(response.choices.len(), 1);
934+
let choice = &response.choices[0];
935+
936+
// The finish_reason should be ToolCalls, not Stop, because tool calls are present
937+
assert_eq!(
938+
choice.finish_reason,
939+
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
940+
);
941+
942+
// Verify tool calls are present
943+
assert!(choice.message.tool_calls.is_some());
944+
let tool_calls = choice.message.tool_calls.as_ref().unwrap();
945+
assert_eq!(tool_calls.len(), 1);
946+
assert_eq!(tool_calls[0].function.name, "get_weather");
947+
}
691948
}

0 commit comments

Comments
 (0)