Skip to content

Commit 6675bfc

Browse files
authored
feat: enhance GPT OSS frontend with improved harmony tool calling parser and reasoning parser (#2999)
Signed-off-by: zhongdaor <[email protected]>
1 parent 20b7a8a commit 6675bfc

File tree

9 files changed

+219
-15
lines changed

9 files changed

+219
-15
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,4 @@ opt-level = 3
8181
[profile.release]
8282
# These make the build much slower but shrink the binary, and could help performance
8383
codegen-units = 1
84-
lto = true
84+
lto = true

lib/llm/src/discovery/watcher.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,14 @@ impl ModelWatcher {
263263
let client = component.endpoint(&endpoint_id.name).client().await?;
264264
let model_slug = model_entry.slug();
265265
let card = match ModelDeploymentCard::load_from_store(&model_slug, &self.drt).await {
266-
Ok(Some(card)) => card,
266+
Ok(Some(mut card)) => {
267+
tracing::debug!(card.display_name, "adding model");
268+
// Ensure runtime_config is populated
269+
if let Some(rc) = model_entry.runtime_config.clone() {
270+
card.runtime_config = rc;
271+
}
272+
card
273+
}
267274
Ok(None) => {
268275
anyhow::bail!("Missing ModelDeploymentCard in storage under key {model_slug}");
269276
}

lib/llm/src/preprocessor.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ pub struct OpenAIPreprocessor {
160160
formatter: Arc<dyn OAIPromptFormatter>,
161161
tokenizer: Arc<dyn Tokenizer>,
162162
model_info: Arc<dyn ModelInfo>,
163+
/// Per-model runtime configuration propagated to response generator (e.g., reasoning/tool parser)
164+
runtime_config: crate::local_model::runtime_config::ModelRuntimeConfig,
163165
tool_call_parser: Option<String>,
164166
}
165167

@@ -187,11 +189,15 @@ impl OpenAIPreprocessor {
187189
let model_info = model_info.get_model_info()?;
188190
let tool_call_parser = mdc.runtime_config.tool_call_parser.clone();
189191

192+
// // Initialize runtime config from the ModelDeploymentCard
193+
let runtime_config = mdc.runtime_config.clone();
194+
190195
Ok(Arc::new(Self {
191196
formatter,
192197
tokenizer,
193198
model_info,
194199
mdcsum,
200+
runtime_config,
195201
tool_call_parser,
196202
}))
197203
}
@@ -948,6 +954,8 @@ impl
948954
let response_generator = request.response_generator(context.id().to_string());
949955
let mut response_generator = Box::new(response_generator);
950956

957+
// set the runtime configuration
958+
response_generator.set_reasoning_parser(self.runtime_config.clone());
951959
let enable_tool_calling =
952960
maybe_enable_tool_call(self.tool_call_parser.as_deref(), &request);
953961
// convert the chat completion request to a common completion request

lib/llm/src/protocols/openai/chat_completions/delta.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,20 @@ impl DeltaGenerator {
125125
}
126126
}
127127

128+
/// Update runtime configuration and reconfigure the reasoning parser accordingly.
129+
pub fn set_reasoning_parser(&mut self, runtime_config: ModelRuntimeConfig) {
130+
self.options.runtime_config = runtime_config.clone();
131+
match self.options.runtime_config.reasoning_parser.as_deref() {
132+
Some(name) => {
133+
self.reasoning_parser =
134+
Some(ReasoningParserType::get_reasoning_parser_from_name(name));
135+
}
136+
None => {
137+
self.reasoning_parser = None;
138+
}
139+
}
140+
}
141+
128142
/// Updates the prompt token usage count.
129143
///
130144
/// # Arguments

lib/parsers/src/reasoning/gpt_oss_parser.rs

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ impl ReasoningParser for GptOssReasoningParser {
150150

151151
fn parse_reasoning_streaming_incremental(
152152
&mut self,
153-
_text: &str,
153+
text: &str,
154154
token_ids: &[u32],
155155
) -> ParserResult {
156156
tracing::debug!(
@@ -173,9 +173,8 @@ impl ReasoningParser for GptOssReasoningParser {
173173
}
174174

175175
if let Some(channel) = self.parser.current_channel() {
176-
tracing::debug!("Current channel: {}", channel);
176+
tracing::debug!("Current channel {}", channel);
177177
if channel == "final" {
178-
tracing::debug!("In final channel, processing normal text");
179178
// If we're in the final channel, we should not parse reasoning
180179
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
181180
tracing::debug!("Got normal text delta of {} chars", current.len());
@@ -186,6 +185,64 @@ impl ReasoningParser for GptOssReasoningParser {
186185
}
187186
tracing::debug!("No content delta in final channel");
188187
ParserResult::default()
188+
} else if channel == "commentary" {
189+
// If we're in the commentary channel, we should return raw token content and recover content that has been consumed by the parser
190+
// so that the tool parser can process it properly
191+
if let Ok(enc) = get_harmony_encoding() {
192+
let current_content = self.parser.current_content().unwrap_or_default();
193+
let mut final_text = text.to_string();
194+
195+
// Restore commentary metadata consumed by the parser so the tool-call parser can
196+
// process it correctly.
197+
//
198+
// Example:
199+
// Before parsing:
200+
// "<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{\"format\":\"celsius\",\"location\":\"San Francisco\"}<|call|>"
201+
// After parsing, the header is stripped, so we must reconstruct it:
202+
// "<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>"
203+
//
204+
// This ensures downstream tool-call parsing receives the channel, target, and
205+
// constraint metadata together with the message payload.
206+
207+
// Recovery should only happen once, and only when `current_content` is empty.
208+
if current_content.is_empty() {
209+
let tokens = self.parser.tokens();
210+
211+
// Get the token id for " <|channel|>"
212+
let channel_token_id = enc
213+
.tokenizer()
214+
.encode_with_special_tokens("<|channel|>")
215+
.last()
216+
.copied();
217+
218+
// Find the last occurrence of the <|channel|> token (id 20005) in the tokens vector
219+
let last_channel_token_idx = channel_token_id
220+
.and_then(|token_id| {
221+
tokens.iter().rposition(|token| *token == token_id)
222+
})
223+
.unwrap_or(0);
224+
225+
// Then get the generated text from the last <|channel|> to the end of self.parser.tokens()
226+
let end_token_idx = self.parser.tokens().len();
227+
// Use Harmony's decode_utf8 to decode tokens into text
228+
let generated_text = enc
229+
.tokenizer()
230+
.decode_utf8(
231+
&self.parser.tokens()[last_channel_token_idx..end_token_idx],
232+
)
233+
.unwrap_or_default();
234+
235+
final_text = generated_text;
236+
}
237+
238+
ParserResult {
239+
normal_text: final_text,
240+
reasoning_text: String::new(),
241+
}
242+
} else {
243+
tracing::warn!("Failed to get harmony encoding for raw token decoding");
244+
ParserResult::default()
245+
}
189246
} else {
190247
tracing::debug!("In reasoning channel: {}", channel);
191248
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {

lib/parsers/src/tool_calling/harmony/harmony_parser.rs

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33

44
use super::config::JsonParserConfig;
55
use super::response::{CalledFunction, ToolCallResponse, ToolCallType};
6-
use openai_harmony::StreamableParser;
76
use openai_harmony::chat::{Content::Text, Role};
8-
use openai_harmony::{HarmonyEncoding, HarmonyEncodingName, load_harmony_encoding};
7+
use openai_harmony::{
8+
HarmonyEncoding, HarmonyEncodingName, StreamableParser, load_harmony_encoding,
9+
};
910
use serde_json::Value;
1011

1112
static GLOBAL_HARMONY_GPTOSS_ENCODING: tokio::sync::OnceCell<
@@ -162,6 +163,109 @@ pub async fn parse_tool_calls_harmony(
162163
Ok((res, Some(normal_text.to_string())))
163164
}
164165

166+
/// Parse tool calls from a complete Harmony Format text chunk using direct token parsing.
167+
///
168+
/// This function is optimized for parsing complete text chunks where the entire content
169+
/// is available at once. It uses `parse_messages_from_completion_tokens` to directly
170+
/// parse all tokens into Harmony Format messages, then extracts tool calls from messages
171+
/// with the "commentary" channel and "functions.*" recipients.
172+
///
173+
/// Unlike `parse_tool_calls_harmony`, this function doesn't perform start token detection
174+
/// or token-by-token streaming, making it more efficient for complete chunks.
175+
///
176+
/// # Arguments
177+
/// * `text` - The full Harmony-format string to be parsed, excluding any trailing stop tokens.
178+
/// Example:
179+
/// `<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco"}`
180+
/// * `_config` - Parser configuration (currently unused but kept for API consistency)
181+
///
182+
/// # Returns
183+
/// * `Ok((tool_calls, normal_text))` - Tuple containing extracted tool calls and any normal text
184+
/// * `Err(e)` - If parsing fails due to encoding or tokenization errors
185+
pub async fn parse_tool_calls_harmony_complete(
186+
text: &str,
187+
_config: &JsonParserConfig,
188+
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
189+
let enc = match get_harmony_encoding().await.as_ref() {
190+
Ok(e) => e,
191+
Err(e) => {
192+
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
193+
return Ok((vec![], Some(text.to_string())));
194+
}
195+
};
196+
197+
// // Encode the text into tokens using harmony encoding
198+
let tokens: Vec<u32> = enc.tokenizer().encode_with_special_tokens(text);
199+
let messages = match enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)) {
200+
Ok(messages) => messages,
201+
Err(e) => {
202+
tracing::debug!(
203+
"Failed to parse messages from completion tokens: {e}. Tool calls will not be parsed."
204+
);
205+
return Ok((vec![], Some(text.to_string())));
206+
}
207+
};
208+
209+
let mut normal_text = String::new();
210+
211+
let mut res = Vec::with_capacity(messages.len());
212+
let mut call_idx = 0; // Index of the tool call
213+
214+
for message in messages.iter() {
215+
if message.author.role != Role::Assistant {
216+
continue;
217+
}
218+
219+
let channel = message.channel.as_deref();
220+
let recipient = message.recipient.as_deref().unwrap_or_default();
221+
222+
// Handle commentary channel
223+
if channel == Some("commentary") && recipient.starts_with("functions.") {
224+
let Some(fname) = message
225+
.recipient
226+
.as_ref()
227+
.and_then(|r| r.split('.').nth(1))
228+
.filter(|s| !s.is_empty())
229+
.map(|s| s.to_string())
230+
else {
231+
continue;
232+
};
233+
234+
let args = match message.content.first() {
235+
Some(Text(text)) => match serde_json::from_str::<Value>(text.text.trim()) {
236+
Ok(value) => value,
237+
Err(_) => {
238+
Value::Null // Set args to null if it's not valid JSON
239+
}
240+
},
241+
_ => {
242+
Value::Null // Set args to null if it's not a text content
243+
}
244+
};
245+
// Add tool call to result if args is valid JSON
246+
if !args.is_null() {
247+
call_idx += 1;
248+
res.push(ToolCallResponse {
249+
id: format!("call-{}", call_idx),
250+
tp: ToolCallType::Function,
251+
function: CalledFunction {
252+
name: fname.to_string(),
253+
// Safety: `Value::Object` is always valid JSON, so serialization cannot fail
254+
arguments: serde_json::to_string(&args).unwrap(),
255+
},
256+
});
257+
}
258+
// Handle reasoning(analysis) channel
259+
} else if channel == Some("analysis") {
260+
normal_text.push_str(match &message.content[0] {
261+
Text(t) => &t.text,
262+
_ => "",
263+
});
264+
}
265+
}
266+
Ok((res, Some(normal_text.to_string())))
267+
}
268+
165269
pub fn detect_tool_call_start_harmony(
166270
chunk: &str,
167271
config: &JsonParserConfig,
@@ -266,6 +370,20 @@ mod tests {
266370
assert_eq!(args["location"], "San Francisco");
267371
}
268372

373+
#[tokio::test]
374+
async fn test_parse_tool_calls_harmony_complete_basic() {
375+
let text = r#"<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"format":"celsius","location":"San Francisco"}"#;
376+
let (tool_calls, normal_content) =
377+
parse_tool_calls_harmony_complete(text, &Default::default())
378+
.await
379+
.unwrap();
380+
assert_eq!(normal_content, Some("".to_string()));
381+
let (name, args) = extract_name_and_args(tool_calls[0].clone());
382+
assert_eq!(name, "get_current_weather");
383+
assert_eq!(args["location"], "San Francisco");
384+
assert_eq!(args["format"], "celsius");
385+
}
386+
269387
#[tokio::test]
270388
async fn test_parse_tools_harmony_without_start_token() {
271389
let text = r#"

lib/parsers/src/tool_calling/harmony/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@
44
pub mod harmony_parser;
55

66
pub use super::{config, response};
7-
pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
7+
pub use harmony_parser::{
8+
detect_tool_call_start_harmony, parse_tool_calls_harmony, parse_tool_calls_harmony_complete,
9+
};

lib/parsers/src/tool_calling/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pub mod tools;
1111

1212
// Re-export main types and functions for convenience
1313
pub use config::{JsonParserConfig, ToolCallConfig, ToolCallParserType};
14-
pub use harmony::parse_tool_calls_harmony;
14+
pub use harmony::{parse_tool_calls_harmony, parse_tool_calls_harmony_complete};
1515
pub use json::try_tool_call_parse_json;
1616
pub use parsers::{detect_and_parse_tool_call, try_tool_call_parse};
1717
pub use pythonic::try_tool_call_parse_pythonic;

lib/parsers/src/tool_calling/parsers.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use super::config::{ToolCallConfig, ToolCallParserType};
5-
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
5+
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony_complete};
66
use super::json::{detect_tool_call_start_json, try_tool_call_parse_json};
77
use super::pythonic::{detect_tool_call_start_pythonic, try_tool_call_parse_pythonic};
88
use super::response::ToolCallResponse;
@@ -43,7 +43,8 @@ pub async fn try_tool_call_parse(
4343
Ok((results, normal_content))
4444
}
4545
ToolCallParserType::Harmony => {
46-
let (results, normal_content) = parse_tool_calls_harmony(message, &config.json).await?;
46+
let (results, normal_content) =
47+
parse_tool_calls_harmony_complete(message, &config.json).await?;
4748
Ok((results, normal_content))
4849
}
4950
ToolCallParserType::Pythonic => {
@@ -1450,10 +1451,7 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
14501451
#[tokio::test]
14511452
async fn test_harmony_parser_basic() {
14521453
let input = r#"
1453-
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
1454-
<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
1455-
<|message|>{"location":"San Francisco", "unit":"fahrenheit"}<|call|>
1456-
"#;
1454+
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|><|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco", "unit":"fahrenheit"}"#;
14571455
let (result, content) = detect_and_parse_tool_call(input, Some("harmony"))
14581456
.await
14591457
.unwrap();

0 commit comments

Comments
 (0)