@@ -73,27 +73,40 @@ fn handle_single_token_tool_calls(input: &str, start_token: &str) -> Option<Stri
73
73
if s. is_empty ( ) {
74
74
continue ;
75
75
}
76
- // Only consider segments that start like JSON
76
+ // Only consider segments that start like JSON (objects or arrays)
77
77
if s. starts_with ( '{' ) {
78
- // Trim trailing non-JSON by cutting at the last closing brace/bracket
78
+ // Trim trailing non-JSON by cutting at the last closing brace
79
79
if let Some ( pos) = s. rfind ( '}' ) {
80
80
let candidate = & s[ ..=pos] . trim ( ) ;
81
81
// Keep only valid JSON candidates
82
82
if serde_json:: from_str :: < serde_json:: Value > ( candidate) . is_ok ( ) {
83
83
items. push ( candidate. to_string ( ) ) ;
84
84
}
85
85
}
86
+ } else if s. starts_with ( '[' ) {
87
+ // Handle array format (like phi4: functools[{...}])
88
+ if let Some ( pos) = s. rfind ( ']' ) {
89
+ let candidate = & s[ ..=pos] . trim ( ) ;
90
+ // Keep only valid JSON arrays
91
+ if serde_json:: from_str :: < serde_json:: Value > ( candidate) . is_ok ( ) {
92
+ // For arrays, we need to extract the individual objects
93
+ if let Ok ( serde_json:: Value :: Array ( arr) ) =
94
+ serde_json:: from_str :: < serde_json:: Value > ( candidate)
95
+ {
96
+ for item in arr {
97
+ if let Ok ( item_str) = serde_json:: to_string ( & item) {
98
+ items. push ( item_str) ;
99
+ }
100
+ }
101
+ }
102
+ }
103
+ }
86
104
}
87
105
}
88
106
if items. is_empty ( ) {
89
- // Remove everything up to and including the first occurrence of the start token
90
- if let Some ( idx) = input. find ( start_token) {
91
- let rest = & input[ idx + start_token. len ( ) ..] ;
92
- return Some ( rest. trim_start ( ) . to_string ( ) ) ;
93
- } else {
94
- // Shouldn't happen because we checked contains() above, but be defensive
95
- return None ;
96
- }
107
+ // If we found the start token but no valid JSON after it, return empty string
108
+ // to avoid leaking the invalid content (important for phi4 and similar models)
109
+ return Some ( String :: new ( ) ) ;
97
110
}
98
111
Some ( format ! ( "[{}]" , items. join( "," ) ) )
99
112
}
@@ -174,6 +187,7 @@ pub fn try_tool_call_parse_basic_json(
174
187
// Assumption : One message will not contain different tags for tool calls. Iteration over tags is to support different tags by default for multiple models
175
188
let mut json = trimmed. to_string ( ) ;
176
189
let mut normal_text = trimmed. to_string ( ) ;
190
+ let mut found_start_token_with_no_valid_json = false ;
177
191
178
192
// First, check if ANY start token exists in the input
179
193
let has_start_token = tool_call_start_tokens
@@ -204,18 +218,32 @@ pub fn try_tool_call_parse_basic_json(
204
218
// Single token case
205
219
let result = handle_single_token_tool_calls ( & json, start_token) ;
206
220
if let Some ( content) = result {
221
+ // Check if we found a start token but got empty JSON back
222
+ // This indicates the token was found but no valid JSON followed
223
+ if content. is_empty ( ) {
224
+ found_start_token_with_no_valid_json = true ;
225
+ }
226
+
207
227
json = content;
208
228
// For single token case, use the normal text we extracted earlier
209
229
normal_text = new_normal_text;
230
+
210
231
break ; // Found content, exit early
211
232
}
212
233
}
213
234
( false , false ) => {
214
235
// Start and end token case
215
236
let result = extract_tool_call_content ( & json, start_token, end_token) ;
216
237
if let Some ( content) = result {
238
+ // Check if we found a start token but got empty JSON back
239
+ // This indicates the token was found but no valid JSON followed
240
+ if content. is_empty ( ) {
241
+ found_start_token_with_no_valid_json = true ;
242
+ }
243
+
217
244
json = content;
218
245
normal_text = new_normal_text;
246
+
219
247
break ; // Found content, exit early
220
248
}
221
249
}
@@ -304,7 +332,13 @@ pub fn try_tool_call_parse_basic_json(
304
332
return Ok ( ( results, Some ( normal_text) ) ) ;
305
333
}
306
334
307
- Ok ( ( vec ! [ ] , Some ( trimmed. to_string ( ) ) ) )
335
+ // If we found a start token but no valid JSON, return empty content
336
+ // to avoid leaking the token and invalid JSON content
337
+ if found_start_token_with_no_valid_json {
338
+ Ok ( ( vec ! [ ] , Some ( String :: new ( ) ) ) )
339
+ } else {
340
+ Ok ( ( vec ! [ ] , Some ( trimmed. to_string ( ) ) ) )
341
+ }
308
342
}
309
343
310
344
pub fn detect_tool_call_start_basic_json ( chunk : & str , config : & JsonParserConfig ) -> bool {
@@ -330,12 +364,22 @@ pub fn detect_tool_call_start_basic_json(chunk: &str, config: &JsonParserConfig)
330
364
return false ;
331
365
}
332
366
// Check if the chunk could be a prefix of this start token
333
- // We need to be careful to avoid false positives
334
367
// Handle Unicode character boundaries properly
335
368
for i in 1 ..=token. chars ( ) . count ( ) {
336
369
if let Some ( prefix) = token. chars ( ) . take ( i) . collect :: < String > ( ) . get ( ..) {
337
370
let prefix_str = & prefix[ ..prefix. len ( ) ] ;
338
- if trimmed == prefix_str || trimmed. ends_with ( prefix_str) {
371
+ // Check for exact prefix match
372
+ if trimmed == prefix_str {
373
+ return true ;
374
+ }
375
+ // For longer prefixes (3+ chars), allow them anywhere in the input
376
+ // This allows "funny joke" to match "functools" via "fun"
377
+ // but prevents "<tool_call>" from matching "<TOOLCALL>" via single char "<"
378
+ if prefix_str. len ( ) >= 3 && trimmed. contains ( prefix_str) {
379
+ return true ;
380
+ }
381
+ // For shorter prefixes, only match if they're at the end (streaming scenario)
382
+ if prefix_str. len ( ) < 3 && trimmed. ends_with ( prefix_str) {
339
383
return true ;
340
384
}
341
385
}
0 commit comments