Skip to content

Commit 39b0a64

Browse files
Merge pull request #953 from MervinPraison/revert-945-claude/issue-940-20250716-0711
Revert "fix: prevent premature termination in Ollama sequential tool execution"
2 parents 5b836ef + 86a5f31 commit 39b0a64

File tree

1 file changed

+14
-224
lines changed
  • src/praisonai-agents/praisonaiagents/llm

1 file changed

+14
-224
lines changed

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 14 additions & 224 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,8 @@ def _generate_ollama_tool_summary(self, tool_results: List[Any], response_text:
323323
if not (self._is_ollama_provider() and tool_results):
324324
return None
325325

326-
# If response is a final answer, no summary needed
327-
if self._is_final_answer(response_text, False, tool_results):
326+
# If response is substantial, no summary needed
327+
if response_text and len(response_text.strip()) > OLLAMA_MIN_RESPONSE_LENGTH:
328328
return None
329329

330330
# Build tool summary efficiently
@@ -349,212 +349,6 @@ def _format_ollama_tool_result_message(self, function_name: str, tool_result: An
349349
"content": f"The {function_name} function returned: {tool_result_str}"
350350
}
351351

352-
def _analyze_tool_chain_context(self, tool_results: List[Any], response_text: str) -> dict:
353-
"""
354-
Analyze tool execution context to understand the current state of the tool chain.
355-
356-
Returns a context dictionary with confidence scores and patterns.
357-
"""
358-
context = {
359-
'tool_count': len(tool_results) if tool_results else 0,
360-
'has_sequential_tools': False,
361-
'has_final_computation': False,
362-
'has_data_retrieval': False,
363-
'confidence_score': 0.0,
364-
'tool_names': [],
365-
'tool_success_rate': 1.0
366-
}
367-
368-
if not tool_results:
369-
return context
370-
371-
# Extract tool names and analyze patterns
372-
successful_tools = 0
373-
for result in tool_results:
374-
if isinstance(result, dict) and 'function_name' in result:
375-
tool_name = result['function_name'].lower()
376-
context['tool_names'].append(tool_name)
377-
378-
# Check if tool execution was successful
379-
if 'error' not in result or not result.get('error'):
380-
successful_tools += 1
381-
382-
# Analyze tool types for patterns
383-
if any(pattern in tool_name for pattern in ['get', 'fetch', 'search', 'retrieve', 'find']):
384-
context['has_data_retrieval'] = True
385-
386-
if any(pattern in tool_name for pattern in ['calculate', 'compute', 'multiply', 'add', 'sum', 'process']):
387-
context['has_final_computation'] = True
388-
389-
# Calculate success rate
390-
if tool_results:
391-
context['tool_success_rate'] = successful_tools / len(tool_results)
392-
393-
# Detect sequential tool usage patterns
394-
if len(set(context['tool_names'])) > 1:
395-
context['has_sequential_tools'] = True
396-
397-
# Calculate confidence score based on tool chain analysis
398-
confidence = 0.0
399-
400-
# Sequential tools with final computation suggest completion
401-
if context['has_sequential_tools'] and context['has_final_computation']:
402-
confidence += 0.4
403-
404-
# Data retrieval followed by processing
405-
if context['has_data_retrieval'] and context['has_final_computation']:
406-
confidence += 0.3
407-
408-
# High success rate adds confidence
409-
confidence += context['tool_success_rate'] * 0.2
410-
411-
# Multiple tools executed successfully
412-
if context['tool_count'] >= 2 and context['tool_success_rate'] > 0.8:
413-
confidence += 0.1
414-
415-
context['confidence_score'] = min(confidence, 1.0)
416-
return context
417-
418-
def _assess_response_quality(self, response_text: str, tool_results: List[Any]) -> dict:
419-
"""
420-
Assess the quality and completeness of a response based on content analysis.
421-
422-
Returns quality metrics and confidence scores.
423-
"""
424-
quality = {
425-
'length': len(response_text.strip()) if response_text else 0,
426-
'has_tool_references': False,
427-
'has_conclusion_indicators': False,
428-
'contains_results': False,
429-
'quality_score': 0.0
430-
}
431-
432-
if not response_text:
433-
return quality
434-
435-
response_lower = response_text.lower().strip()
436-
437-
# Check for tool result integration
438-
if tool_results:
439-
tool_result_strings = [str(result) for result in tool_results if result]
440-
for tool_result in tool_result_strings:
441-
if tool_result and any(part in response_lower for part in str(tool_result).lower().split() if len(part) > 3):
442-
quality['has_tool_references'] = True
443-
break
444-
445-
# Check for conclusion indicators (dynamic pattern matching)
446-
conclusion_indicators = ['therefore', 'so', 'result', 'answer', 'conclusion', 'final', 'total', 'summary']
447-
quality['has_conclusion_indicators'] = any(indicator in response_lower for indicator in conclusion_indicators)
448-
449-
# Check if response contains actual results/data
450-
if any(char.isdigit() for char in response_text) or '$' in response_text:
451-
quality['contains_results'] = True
452-
453-
# Calculate quality score
454-
score = 0.0
455-
456-
# Response length contributes to quality
457-
if quality['length'] > 20:
458-
score += 0.2
459-
if quality['length'] > 50:
460-
score += 0.1
461-
if quality['length'] > 100:
462-
score += 0.1
463-
464-
# Content quality indicators
465-
if quality['has_tool_references']:
466-
score += 0.3
467-
if quality['has_conclusion_indicators']:
468-
score += 0.2
469-
if quality['contains_results']:
470-
score += 0.1
471-
472-
quality['quality_score'] = min(score, 1.0)
473-
return quality
474-
475-
def _should_generate_tool_summary(self, tool_results: List[Any], response_text: str, iteration_count: int) -> bool:
476-
"""
477-
Dynamically determine if a tool summary should be generated based on context analysis.
478-
479-
This replaces the hardcoded iteration_count >= 5 check with intelligent analysis.
480-
"""
481-
# Analyze tool execution context
482-
tool_context = self._analyze_tool_chain_context(tool_results, response_text)
483-
484-
# Assess response quality
485-
response_quality = self._assess_response_quality(response_text, tool_results)
486-
487-
# Decision logic based on dynamic analysis
488-
should_generate = False
489-
490-
# High confidence that tool chain is complete
491-
if tool_context['confidence_score'] >= 0.7:
492-
should_generate = True
493-
494-
# Good tool chain with quality response
495-
elif tool_context['confidence_score'] >= 0.5 and response_quality['quality_score'] >= 0.6:
496-
should_generate = True
497-
498-
# Sequential tools with final computation and good response
499-
elif (tool_context['has_sequential_tools'] and
500-
tool_context['has_final_computation'] and
501-
response_quality['quality_score'] >= 0.4):
502-
should_generate = True
503-
504-
# Safety fallback - prevent infinite loops (increased threshold)
505-
elif iteration_count >= 7:
506-
should_generate = True
507-
508-
return should_generate
509-
510-
def _is_final_answer(self, response_text: str, has_tool_calls: bool, tool_results: List[Any]) -> bool:
511-
"""
512-
Determine if a response is a final answer or intermediate acknowledgment.
513-
514-
This method provides intelligent differentiation using dynamic analysis
515-
instead of hardcoded patterns.
516-
517-
Args:
518-
response_text: The text response from the LLM
519-
has_tool_calls: Whether the response contains tool calls
520-
tool_results: Results from executed tools
521-
522-
Returns:
523-
True if this is a final answer, False if intermediate
524-
"""
525-
if not response_text or not response_text.strip():
526-
return False
527-
528-
# If response contains tool calls, it's likely not a final answer
529-
if has_tool_calls:
530-
return False
531-
532-
# For Ollama, use dynamic analysis instead of hardcoded patterns
533-
if self._is_ollama_provider() and tool_results:
534-
# Analyze tool chain context
535-
tool_context = self._analyze_tool_chain_context(tool_results, response_text)
536-
537-
# Assess response quality
538-
response_quality = self._assess_response_quality(response_text, tool_results)
539-
540-
# Dynamic decision based on context and quality
541-
# If we have a complete tool chain with quality response, it's likely final
542-
if (tool_context['confidence_score'] >= 0.6 and
543-
response_quality['quality_score'] >= 0.5):
544-
return True
545-
546-
# If response is very short and we have tool results, likely intermediate
547-
if response_quality['length'] < 20:
548-
return False
549-
550-
# If response doesn't reference tool results, likely intermediate
551-
if not response_quality['has_tool_references'] and response_quality['length'] < 80:
552-
return False
553-
554-
# For other providers, maintain existing behavior
555-
# Substantial content (>10 chars) is considered final
556-
return len(response_text.strip()) > 10
557-
558352
def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple:
559353
"""
560354
Process a streaming delta chunk to extract content and tool calls.
@@ -1308,19 +1102,17 @@ def get_response(
13081102
continue
13091103

13101104
# Check if the LLM provided a final answer alongside the tool calls
1311-
# Use intelligent differentiation between intermediate and final responses
1312-
if self._is_final_answer(response_text, bool(tool_calls), tool_results):
1105+
# If response_text contains substantive content, treat it as the final answer
1106+
if response_text and response_text.strip() and len(response_text.strip()) > 10:
13131107
# LLM provided a final answer after tool execution, don't continue
13141108
final_response_text = response_text.strip()
13151109
break
13161110

13171111
# Special handling for Ollama to prevent infinite loops
1318-
# Use dynamic analysis instead of hardcoded iteration count
1319-
if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count):
1320-
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
1321-
if tool_summary:
1322-
final_response_text = tool_summary
1323-
break
1112+
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
1113+
if tool_summary:
1114+
final_response_text = tool_summary
1115+
break
13241116

13251117
# Otherwise, continue the loop to check if more tools are needed
13261118
iteration_count += 1
@@ -2059,19 +1851,17 @@ async def get_response_async(
20591851
stored_reasoning_content = reasoning_content
20601852

20611853
# Check if the LLM provided a final answer alongside the tool calls
2062-
# Use intelligent differentiation between intermediate and final responses
2063-
if self._is_final_answer(response_text, bool(tool_calls), tool_results):
1854+
# If response_text contains substantive content, treat it as the final answer
1855+
if response_text and response_text.strip() and len(response_text.strip()) > 10:
20641856
# LLM provided a final answer after tool execution, don't continue
20651857
final_response_text = response_text.strip()
20661858
break
20671859

20681860
# Special handling for Ollama to prevent infinite loops
2069-
# Use dynamic analysis instead of hardcoded iteration count
2070-
if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count):
2071-
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
2072-
if tool_summary:
2073-
final_response_text = tool_summary
2074-
break
1861+
tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
1862+
if tool_summary:
1863+
final_response_text = tool_summary
1864+
break
20751865

20761866
# Continue the loop to check if more tools are needed
20771867
iteration_count += 1

0 commit comments

Comments
 (0)