diff --git a/src/praisonai-agents/praisonaiagents/llm/llm.py b/src/praisonai-agents/praisonaiagents/llm/llm.py index 4c46314e0..d9e093ab9 100644 --- a/src/praisonai-agents/praisonaiagents/llm/llm.py +++ b/src/praisonai-agents/praisonaiagents/llm/llm.py @@ -323,8 +323,8 @@ def _generate_ollama_tool_summary(self, tool_results: List[Any], response_text: if not (self._is_ollama_provider() and tool_results): return None - # If response is a final answer, no summary needed - if self._is_final_answer(response_text, False, tool_results): + # If response is substantial, no summary needed + if response_text and len(response_text.strip()) > OLLAMA_MIN_RESPONSE_LENGTH: return None # Build tool summary efficiently @@ -349,212 +349,6 @@ def _format_ollama_tool_result_message(self, function_name: str, tool_result: An "content": f"The {function_name} function returned: {tool_result_str}" } - def _analyze_tool_chain_context(self, tool_results: List[Any], response_text: str) -> dict: - """ - Analyze tool execution context to understand the current state of the tool chain. - - Returns a context dictionary with confidence scores and patterns. - """ - context = { - 'tool_count': len(tool_results) if tool_results else 0, - 'has_sequential_tools': False, - 'has_final_computation': False, - 'has_data_retrieval': False, - 'confidence_score': 0.0, - 'tool_names': [], - 'tool_success_rate': 1.0 - } - - if not tool_results: - return context - - # Extract tool names and analyze patterns - successful_tools = 0 - for result in tool_results: - if isinstance(result, dict) and 'function_name' in result: - tool_name = result['function_name'].lower() - context['tool_names'].append(tool_name) - - # Check if tool execution was successful - if 'error' not in result or not result.get('error'): - successful_tools += 1 - - # Analyze tool types for patterns - if any(pattern in tool_name for pattern in ['get', 'fetch', 'search', 'retrieve', 'find']): - context['has_data_retrieval'] = True - - if any(pattern in tool_name for pattern in ['calculate', 'compute', 'multiply', 'add', 'sum', 'process']): - context['has_final_computation'] = True - - # Calculate success rate - if tool_results: - context['tool_success_rate'] = successful_tools / len(tool_results) - - # Detect sequential tool usage patterns - if len(set(context['tool_names'])) > 1: - context['has_sequential_tools'] = True - - # Calculate confidence score based on tool chain analysis - confidence = 0.0 - - # Sequential tools with final computation suggest completion - if context['has_sequential_tools'] and context['has_final_computation']: - confidence += 0.4 - - # Data retrieval followed by processing - if context['has_data_retrieval'] and context['has_final_computation']: - confidence += 0.3 - - # High success rate adds confidence - confidence += context['tool_success_rate'] * 0.2 - - # Multiple tools executed successfully - if context['tool_count'] >= 2 and context['tool_success_rate'] > 0.8: - confidence += 0.1 - - context['confidence_score'] = min(confidence, 1.0) - return context - - def _assess_response_quality(self, response_text: str, tool_results: List[Any]) -> dict: - """ - Assess the quality and completeness of a response based on content analysis. - - Returns quality metrics and confidence scores. - """ - quality = { - 'length': len(response_text.strip()) if response_text else 0, - 'has_tool_references': False, - 'has_conclusion_indicators': False, - 'contains_results': False, - 'quality_score': 0.0 - } - - if not response_text: - return quality - - response_lower = response_text.lower().strip() - - # Check for tool result integration - if tool_results: - tool_result_strings = [str(result) for result in tool_results if result] - for tool_result in tool_result_strings: - if tool_result and any(part in response_lower for part in str(tool_result).lower().split() if len(part) > 3): - quality['has_tool_references'] = True - break - - # Check for conclusion indicators (dynamic pattern matching) - conclusion_indicators = ['therefore', 'so', 'result', 'answer', 'conclusion', 'final', 'total', 'summary'] - quality['has_conclusion_indicators'] = any(indicator in response_lower for indicator in conclusion_indicators) - - # Check if response contains actual results/data - if any(char.isdigit() for char in response_text) or '$' in response_text: - quality['contains_results'] = True - - # Calculate quality score - score = 0.0 - - # Response length contributes to quality - if quality['length'] > 20: - score += 0.2 - if quality['length'] > 50: - score += 0.1 - if quality['length'] > 100: - score += 0.1 - - # Content quality indicators - if quality['has_tool_references']: - score += 0.3 - if quality['has_conclusion_indicators']: - score += 0.2 - if quality['contains_results']: - score += 0.1 - - quality['quality_score'] = min(score, 1.0) - return quality - - def _should_generate_tool_summary(self, tool_results: List[Any], response_text: str, iteration_count: int) -> bool: - """ - Dynamically determine if a tool summary should be generated based on context analysis. - - This replaces the hardcoded iteration_count >= 5 check with intelligent analysis. - """ - # Analyze tool execution context - tool_context = self._analyze_tool_chain_context(tool_results, response_text) - - # Assess response quality - response_quality = self._assess_response_quality(response_text, tool_results) - - # Decision logic based on dynamic analysis - should_generate = False - - # High confidence that tool chain is complete - if tool_context['confidence_score'] >= 0.7: - should_generate = True - - # Good tool chain with quality response - elif tool_context['confidence_score'] >= 0.5 and response_quality['quality_score'] >= 0.6: - should_generate = True - - # Sequential tools with final computation and good response - elif (tool_context['has_sequential_tools'] and - tool_context['has_final_computation'] and - response_quality['quality_score'] >= 0.4): - should_generate = True - - # Safety fallback - prevent infinite loops (increased threshold) - elif iteration_count >= 7: - should_generate = True - - return should_generate - - def _is_final_answer(self, response_text: str, has_tool_calls: bool, tool_results: List[Any]) -> bool: - """ - Determine if a response is a final answer or intermediate acknowledgment. - - This method provides intelligent differentiation using dynamic analysis - instead of hardcoded patterns. - - Args: - response_text: The text response from the LLM - has_tool_calls: Whether the response contains tool calls - tool_results: Results from executed tools - - Returns: - True if this is a final answer, False if intermediate - """ - if not response_text or not response_text.strip(): - return False - - # If response contains tool calls, it's likely not a final answer - if has_tool_calls: - return False - - # For Ollama, use dynamic analysis instead of hardcoded patterns - if self._is_ollama_provider() and tool_results: - # Analyze tool chain context - tool_context = self._analyze_tool_chain_context(tool_results, response_text) - - # Assess response quality - response_quality = self._assess_response_quality(response_text, tool_results) - - # Dynamic decision based on context and quality - # If we have a complete tool chain with quality response, it's likely final - if (tool_context['confidence_score'] >= 0.6 and - response_quality['quality_score'] >= 0.5): - return True - - # If response is very short and we have tool results, likely intermediate - if response_quality['length'] < 20: - return False - - # If response doesn't reference tool results, likely intermediate - if not response_quality['has_tool_references'] and response_quality['length'] < 80: - return False - - # For other providers, maintain existing behavior - # Substantial content (>10 chars) is considered final - return len(response_text.strip()) > 10 - def _process_stream_delta(self, delta, response_text: str, tool_calls: List[Dict], formatted_tools: Optional[List] = None) -> tuple: """ Process a streaming delta chunk to extract content and tool calls. @@ -1308,19 +1102,17 @@ def get_response( continue # Check if the LLM provided a final answer alongside the tool calls - # Use intelligent differentiation between intermediate and final responses - if self._is_final_answer(response_text, bool(tool_calls), tool_results): + # If response_text contains substantive content, treat it as the final answer + if response_text and response_text.strip() and len(response_text.strip()) > 10: # LLM provided a final answer after tool execution, don't continue final_response_text = response_text.strip() break # Special handling for Ollama to prevent infinite loops - # Use dynamic analysis instead of hardcoded iteration count - if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count): - tool_summary = self._generate_ollama_tool_summary(tool_results, response_text) - if tool_summary: - final_response_text = tool_summary - break + tool_summary = self._generate_ollama_tool_summary(tool_results, response_text) + if tool_summary: + final_response_text = tool_summary + break # Otherwise, continue the loop to check if more tools are needed iteration_count += 1 @@ -2059,19 +1851,17 @@ async def get_response_async( stored_reasoning_content = reasoning_content # Check if the LLM provided a final answer alongside the tool calls - # Use intelligent differentiation between intermediate and final responses - if self._is_final_answer(response_text, bool(tool_calls), tool_results): + # If response_text contains substantive content, treat it as the final answer + if response_text and response_text.strip() and len(response_text.strip()) > 10: # LLM provided a final answer after tool execution, don't continue final_response_text = response_text.strip() break # Special handling for Ollama to prevent infinite loops - # Use dynamic analysis instead of hardcoded iteration count - if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count): - tool_summary = self._generate_ollama_tool_summary(tool_results, response_text) - if tool_summary: - final_response_text = tool_summary - break + tool_summary = self._generate_ollama_tool_summary(tool_results, response_text) + if tool_summary: + final_response_text = tool_summary + break # Continue the loop to check if more tools are needed iteration_count += 1