Skip to content

Commit 5b836ef

Browse files
Merge pull request #945 from MervinPraison/claude/issue-940-20250716-0711
fix: prevent premature termination in Ollama sequential tool execution
2 parents 536d98c + c816b59 commit 5b836ef

File tree

1 file changed

+224
-14
lines changed
  • src/praisonai-agents/praisonaiagents/llm

1 file changed

+224
-14
lines changed

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 224 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,8 @@ def _generate_ollama_tool_summary(self, tool_results: List[Any], response_text:
323323
if not (self._is_ollama_provider() and tool_results):
324324
return None
325325

326-
# If response is substantial, no summary needed
327-
if response_text and len(response_text.strip()) > OLLAMA_MIN_RESPONSE_LENGTH:
326+
# If response is a final answer, no summary needed
327+
if self._is_final_answer(response_text, False, tool_results):
328328
return None
329329

330330
# Build tool summary efficiently
@@ -349,6 +349,212 @@ def _format_ollama_tool_result_message(self, function_name: str, tool_result: An
349349
"content": f"The {function_name} function returned: {tool_result_str}"
350350
}
351351

352+
def _analyze_tool_chain_context(self, tool_results: List[Any], response_text: str) -> dict:
353+
"""
354+
Analyze tool execution context to understand the current state of the tool chain.
355+
356+
Returns a context dictionary with confidence scores and patterns.
357+
"""
358+
context = {
359+
'tool_count': len(tool_results) if tool_results else 0,
360+
'has_sequential_tools': False,
361+
'has_final_computation': False,
362+
'has_data_retrieval': False,
363+
'confidence_score': 0.0,
364+
'tool_names': [],
365+
'tool_success_rate': 1.0
366+
}
367+
368+
if not tool_results:
369+
return context
370+
371+
# Extract tool names and analyze patterns
372+
successful_tools = 0
373+
for result in tool_results:
374+
if isinstance(result, dict) and 'function_name' in result:
375+
tool_name = result['function_name'].lower()
376+
context['tool_names'].append(tool_name)
377+
378+
# Check if tool execution was successful
379+
if 'error' not in result or not result.get('error'):
380+
successful_tools += 1
381+
382+
# Analyze tool types for patterns
383+
if any(pattern in tool_name for pattern in ['get', 'fetch', 'search', 'retrieve', 'find']):
384+
context['has_data_retrieval'] = True
385+
386+
if any(pattern in tool_name for pattern in ['calculate', 'compute', 'multiply', 'add', 'sum', 'process']):
387+
context['has_final_computation'] = True
388+
389+
# Calculate success rate
390+
if tool_results:
391+
context['tool_success_rate'] = successful_tools / len(tool_results)
392+
393+
# Detect sequential tool usage patterns
394+
if len(set(context['tool_names'])) > 1:
395+
context['has_sequential_tools'] = True
396+
397+
# Calculate confidence score based on tool chain analysis
398+
confidence = 0.0
399+
400+
# Sequential tools with final computation suggest completion
401+
if context['has_sequential_tools'] and context['has_final_computation']:
402+
confidence += 0.4
403+
404+
# Data retrieval followed by processing
405+
if context['has_data_retrieval'] and context['has_final_computation']:
406+
confidence += 0.3
407+
408+
# High success rate adds confidence
409+
confidence += context['tool_success_rate'] * 0.2
410+
411+
# Multiple tools executed successfully
412+
if context['tool_count'] >= 2 and context['tool_success_rate'] > 0.8:
413+
confidence += 0.1
414+
415+
context['confidence_score'] = min(confidence, 1.0)
416+
return context
417+
418+
def _assess_response_quality(self, response_text: str, tool_results: List[Any]) -> dict:
419+
"""
420+
Assess the quality and completeness of a response based on content analysis.
421+
422+
Returns quality metrics and confidence scores.
423+
"""
424+
quality = {
425+
'length': len(response_text.strip()) if response_text else 0,
426+
'has_tool_references': False,
427+
'has_conclusion_indicators': False,
428+
'contains_results': False,
429+
'quality_score': 0.0
430+
}
431+
432+
if not response_text:
433+
return quality
434+
435+
response_lower = response_text.lower().strip()
436+
437+
# Check for tool result integration
438+
if tool_results:
439+
tool_result_strings = [str(result) for result in tool_results if result]
440+
for tool_result in tool_result_strings:
441+
if tool_result and any(part in response_lower for part in str(tool_result).lower().split() if len(part) > 3):
442+
quality['has_tool_references'] = True
443+
break
444+
445+
# Check for conclusion indicators (dynamic pattern matching)
446+
conclusion_indicators = ['therefore', 'so', 'result', 'answer', 'conclusion', 'final', 'total', 'summary']
447+
quality['has_conclusion_indicators'] = any(indicator in response_lower for indicator in conclusion_indicators)
448+
449+
# Check if response contains actual results/data
450+
if any(char.isdigit() for char in response_text) or '$' in response_text:
451+
quality['contains_results'] = True
452+
453+
# Calculate quality score
454+
score = 0.0
455+
456+
# Response length contributes to quality
457+
if quality['length'] > 20:
458+
score += 0.2
459+
if quality['length'] > 50:
460+
score += 0.1
461+
if quality['length'] > 100:
462+
score += 0.1
463+
464+
# Content quality indicators
465+
if quality['has_tool_references']:
466+
score += 0.3
467+
if quality['has_conclusion_indicators']:
468+
score += 0.2
469+
if quality['contains_results']:
470+
score += 0.1
471+
472+
quality['quality_score'] = min(score, 1.0)
473+
return quality
474+
475+
def _should_generate_tool_summary(self, tool_results: List[Any], response_text: str, iteration_count: int) -> bool:
476+
"""
477+
Dynamically determine if a tool summary should be generated based on context analysis.
478+
479+
This replaces the hardcoded iteration_count >= 5 check with intelligent analysis.
480+
"""
481+
# Analyze tool execution context
482+
tool_context = self._analyze_tool_chain_context(tool_results, response_text)
483+
484+
# Assess response quality
485+
response_quality = self._assess_response_quality(response_text, tool_results)
486+
487+
# Decision logic based on dynamic analysis
488+
should_generate = False
489+
490+
# High confidence that tool chain is complete
491+
if tool_context['confidence_score'] >= 0.7:
492+
should_generate = True
493+
494+
# Good tool chain with quality response
495+
elif tool_context['confidence_score'] >= 0.5 and response_quality['quality_score'] >= 0.6:
496+
should_generate = True
497+
498+
# Sequential tools with final computation and good response
499+
elif (tool_context['has_sequential_tools'] and
500+
tool_context['has_final_computation'] and
501+
response_quality['quality_score'] >= 0.4):
502+
should_generate = True
503+
504+
# Safety fallback - prevent infinite loops (increased threshold)
505+
elif iteration_count >= 7:
506+
should_generate = True
507+
508+
return should_generate
509+
510+
def _is_final_answer(self, response_text: str, has_tool_calls: bool, tool_results: List[Any]) -> bool:
511+
"""
512+
Determine if a response is a final answer or intermediate acknowledgment.
513+
514+
This method provides intelligent differentiation using dynamic analysis
515+
instead of hardcoded patterns.
516+
517+
Args:
518+
response_text: The text response from the LLM
519+
has_tool_calls: Whether the response contains tool calls
520+
tool_results: Results from executed tools
521+
522+
Returns:
523+
True if this is a final answer, False if intermediate
524+
"""
525+
if not response_text or not response_text.strip():
526+
return False
527+
528+
# If response contains tool calls, it's likely not a final answer
529+
if has_tool_calls:
530+
return False
531+
532+
# For Ollama, use dynamic analysis instead of hardcoded patterns
533+
if self._is_ollama_provider() and tool_results:
534+
# Analyze tool chain context
535+
tool_context = self._analyze_tool_chain_context(tool_results, response_text)
536+
537+
# Assess response quality
538+
response_quality = self._assess_response_quality(response_text, tool_results)
539+
540+
# Dynamic decision based on context and quality
541+
# If we have a complete tool chain with quality response, it's likely final
542+
if (tool_context['confidence_score'] >= 0.6 and
543+
response_quality['quality_score'] >= 0.5):
544+
return True
545+
546+
# If response is very short and we have tool results, likely intermediate
547+
if response_quality['length'] < 20:
548+
return False
549+
550+
# If response doesn't reference tool results, likely intermediate
551+
if not response_quality['has_tool_references'] and response_quality['length'] < 80:
552+
return False
553+
554+
# For other providers, maintain existing behavior
555+
# Substantial content (>10 chars) is considered final
556+
return len(response_text.strip()) > 10
557+
352558
def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple:
353559
"""
354560
Process a streaming delta chunk to extract content and tool calls.
@@ -1102,17 +1308,19 @@ def get_response(
11021308
continue
11031309

11041310
# Check if the LLM provided a final answer alongside the tool calls
1105-
# If response_text contains substantive content, treat it as the final answer
1106-
if response_text and response_text.strip() and len(response_text.strip()) > 10:
1311+
# Use intelligent differentiation between intermediate and final responses
1312+
if self._is_final_answer(response_text, bool(tool_calls), tool_results):
11071313
# LLM provided a final answer after tool execution, don't continue
11081314
final_response_text = response_text.strip()
11091315
break
11101316

11111317
# Special handling for Ollama to prevent infinite loops
1112-
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
1113-
if tool_summary:
1114-
final_response_text = tool_summary
1115-
break
1318+
# Use dynamic analysis instead of hardcoded iteration count
1319+
if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count):
1320+
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
1321+
if tool_summary:
1322+
final_response_text = tool_summary
1323+
break
11161324

11171325
# Otherwise, continue the loop to check if more tools are needed
11181326
iteration_count += 1
@@ -1851,17 +2059,19 @@ async def get_response_async(
18512059
stored_reasoning_content = reasoning_content
18522060

18532061
# Check if the LLM provided a final answer alongside the tool calls
1854-
# If response_text contains substantive content, treat it as the final answer
1855-
if response_text and response_text.strip() and len(response_text.strip()) > 10:
2062+
# Use intelligent differentiation between intermediate and final responses
2063+
if self._is_final_answer(response_text, bool(tool_calls), tool_results):
18562064
# LLM provided a final answer after tool execution, don't continue
18572065
final_response_text = response_text.strip()
18582066
break
18592067

18602068
# Special handling for Ollama to prevent infinite loops
1861-
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
1862-
if tool_summary:
1863-
final_response_text = tool_summary
1864-
break
2069+
# Use dynamic analysis instead of hardcoded iteration count
2070+
if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count):
2071+
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
2072+
if tool_summary:
2073+
final_response_text = tool_summary
2074+
break
18652075

18662076
# Continue the loop to check if more tools are needed
18672077
iteration_count += 1

0 commit comments

Comments
 (0)