refactor: replace hardcoded patterns with dynamic tool usage detection

github-actions[bot] · MervinPraison · github-actions[bot] · commit c816b59763c3 · 2025-07-16T12:28:18.000Z
- Replace hardcoded acknowledgment_patterns and status_patterns with intelligent context analysis - Implement _analyze_tool_chain_context() for dynamic tool execution pattern detection - Add _assess_response_quality() for content-based response evaluation - Replace iteration_count >= 5 threshold with _should_generate_tool_summary() dynamic logic - Improve tool chain completion detection with confidence scoring - Increase safety fallback threshold from 5 to 7 iterations - Maintain backward compatibility with existing providers 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Mervin Praison <MervinPraison@users.noreply.github.com>
diff --git a/src/praisonai-agents/praisonaiagents/llm/llm.py b/src/praisonai-agents/praisonaiagents/llm/llm.py
@@ -349,13 +349,170 @@ def _format_ollama_tool_result_message(self, function_name: str, tool_result: An
             "content": f"The {function_name} function returned: {tool_result_str}"
         }
 
+    def _analyze_tool_chain_context(self, tool_results: List[Any], response_text: str) -> dict:
+        """
+        Analyze tool execution context to understand the current state of the tool chain.
+        
+        Returns a context dictionary with confidence scores and patterns.
+        """
+        context = {
+            'tool_count': len(tool_results) if tool_results else 0,
+            'has_sequential_tools': False,
+            'has_final_computation': False,
+            'has_data_retrieval': False,
+            'confidence_score': 0.0,
+            'tool_names': [],
+            'tool_success_rate': 1.0
+        }
+        
+        if not tool_results:
+            return context
+        
+        # Extract tool names and analyze patterns
+        successful_tools = 0
+        for result in tool_results:
+            if isinstance(result, dict) and 'function_name' in result:
+                tool_name = result['function_name'].lower()
+                context['tool_names'].append(tool_name)
+                
+                # Check if tool execution was successful
+                if 'error' not in result or not result.get('error'):
+                    successful_tools += 1
+                
+                # Analyze tool types for patterns
+                if any(pattern in tool_name for pattern in ['get', 'fetch', 'search', 'retrieve', 'find']):
+                    context['has_data_retrieval'] = True
+                
+                if any(pattern in tool_name for pattern in ['calculate', 'compute', 'multiply', 'add', 'sum', 'process']):
+                    context['has_final_computation'] = True
+        
+        # Calculate success rate
+        if tool_results:
+            context['tool_success_rate'] = successful_tools / len(tool_results)
+        
+        # Detect sequential tool usage patterns
+        if len(set(context['tool_names'])) > 1:
+            context['has_sequential_tools'] = True
+        
+        # Calculate confidence score based on tool chain analysis
+        confidence = 0.0
+        
+        # Sequential tools with final computation suggest completion
+        if context['has_sequential_tools'] and context['has_final_computation']:
+            confidence += 0.4
+        
+        # Data retrieval followed by processing
+        if context['has_data_retrieval'] and context['has_final_computation']:
+            confidence += 0.3
+        
+        # High success rate adds confidence
+        confidence += context['tool_success_rate'] * 0.2
+        
+        # Multiple tools executed successfully
+        if context['tool_count'] >= 2 and context['tool_success_rate'] > 0.8:
+            confidence += 0.1
+        
+        context['confidence_score'] = min(confidence, 1.0)
+        return context
+    
+    def _assess_response_quality(self, response_text: str, tool_results: List[Any]) -> dict:
+        """
+        Assess the quality and completeness of a response based on content analysis.
+        
+        Returns quality metrics and confidence scores.
+        """
+        quality = {
+            'length': len(response_text.strip()) if response_text else 0,
+            'has_tool_references': False,
+            'has_conclusion_indicators': False,
+            'contains_results': False,
+            'quality_score': 0.0
+        }
+        
+        if not response_text:
+            return quality
+        
+        response_lower = response_text.lower().strip()
+        
+        # Check for tool result integration
+        if tool_results:
+            tool_result_strings = [str(result) for result in tool_results if result]
+            for tool_result in tool_result_strings:
+                if tool_result and any(part in response_lower for part in str(tool_result).lower().split() if len(part) > 3):
+                    quality['has_tool_references'] = True
+                    break
+        
+        # Check for conclusion indicators (dynamic pattern matching)
+        conclusion_indicators = ['therefore', 'so', 'result', 'answer', 'conclusion', 'final', 'total', 'summary']
+        quality['has_conclusion_indicators'] = any(indicator in response_lower for indicator in conclusion_indicators)
+        
+        # Check if response contains actual results/data
+        if any(char.isdigit() for char in response_text) or '$' in response_text:
+            quality['contains_results'] = True
+        
+        # Calculate quality score
+        score = 0.0
+        
+        # Response length contributes to quality
+        if quality['length'] > 20:
+            score += 0.2
+        if quality['length'] > 50:
+            score += 0.1
+        if quality['length'] > 100:
+            score += 0.1
+        
+        # Content quality indicators
+        if quality['has_tool_references']:
+            score += 0.3
+        if quality['has_conclusion_indicators']:
+            score += 0.2
+        if quality['contains_results']:
+            score += 0.1
+        
+        quality['quality_score'] = min(score, 1.0)
+        return quality
+    
+    def _should_generate_tool_summary(self, tool_results: List[Any], response_text: str, iteration_count: int) -> bool:
+        """
+        Dynamically determine if a tool summary should be generated based on context analysis.
+        
+        This replaces the hardcoded iteration_count >= 5 check with intelligent analysis.
+        """
+        # Analyze tool execution context
+        tool_context = self._analyze_tool_chain_context(tool_results, response_text)
+        
+        # Assess response quality
+        response_quality = self._assess_response_quality(response_text, tool_results)
+        
+        # Decision logic based on dynamic analysis
+        should_generate = False
+        
+        # High confidence that tool chain is complete
+        if tool_context['confidence_score'] >= 0.7:
+            should_generate = True
+        
+        # Good tool chain with quality response
+        elif tool_context['confidence_score'] >= 0.5 and response_quality['quality_score'] >= 0.6:
+            should_generate = True
+        
+        # Sequential tools with final computation and good response
+        elif (tool_context['has_sequential_tools'] and 
+              tool_context['has_final_computation'] and 
+              response_quality['quality_score'] >= 0.4):
+            should_generate = True
+        
+        # Safety fallback - prevent infinite loops (increased threshold)
+        elif iteration_count >= 7:
+            should_generate = True
+        
+        return should_generate
+    
     def _is_final_answer(self, response_text: str, has_tool_calls: bool, tool_results: List[Any]) -> bool:
         """
         Determine if a response is a final answer or intermediate acknowledgment.
         
-        This method provides intelligent differentiation between:
-        - Intermediate responses that acknowledge tool execution
-        - Final responses that contain actual answers to user queries
+        This method provides intelligent differentiation using dynamic analysis
+        instead of hardcoded patterns.
         
         Args:
             response_text: The text response from the LLM
@@ -368,40 +525,31 @@ def _is_final_answer(self, response_text: str, has_tool_calls: bool, tool_result
         if not response_text or not response_text.strip():
             return False
         
-        response_lower = response_text.lower().strip()
-        
         # If response contains tool calls, it's likely not a final answer
         if has_tool_calls:
             return False
+        
+        # For Ollama, use dynamic analysis instead of hardcoded patterns
+        if self._is_ollama_provider() and tool_results:
+            # Analyze tool chain context
+            tool_context = self._analyze_tool_chain_context(tool_results, response_text)
             
-        # For Ollama, be more conservative about what constitutes a final answer
-        if self._is_ollama_provider():
-            # If we have recent tool results, check if this is just acknowledgment
-            if tool_results:
-                # Common patterns of tool acknowledgment (not final answers)
-                acknowledgment_patterns = [
-                    "i'll", "let me", "now i'll", "next i'll", "i need to", "i should",
-                    "executing", "calling", "running", "using the", "based on this",
-                    "now let me", "let me now", "i will now", "proceeding to",
-                    "moving to", "continuing with", "next step", "now that i have",
-                    "tool executed", "function called", "result obtained", "got the result"
-                ]
-                
-                # Check if response is primarily acknowledgment
-                if any(pattern in response_lower for pattern in acknowledgment_patterns):
-                    # If it's short and contains acknowledgment patterns, likely intermediate
-                    if len(response_text.strip()) < 50:
-                        return False
-                
-                # If response is very short and we have tool results, likely intermediate
-                if len(response_text.strip()) < 30:
-                    return False
-                
-                # Additional check: if response mainly contains status updates or simple confirmations
-                status_patterns = ["done", "completed", "finished", "successful", "ok", "ready"]
-                if (len(response_text.strip()) < 40 and 
-                    any(pattern in response_lower for pattern in status_patterns)):
-                    return False
+            # Assess response quality
+            response_quality = self._assess_response_quality(response_text, tool_results)
+            
+            # Dynamic decision based on context and quality
+            # If we have a complete tool chain with quality response, it's likely final
+            if (tool_context['confidence_score'] >= 0.6 and 
+                response_quality['quality_score'] >= 0.5):
+                return True
+            
+            # If response is very short and we have tool results, likely intermediate
+            if response_quality['length'] < 20:
+                return False
+            
+            # If response doesn't reference tool results, likely intermediate
+            if not response_quality['has_tool_references'] and response_quality['length'] < 80:
+                return False
         
         # For other providers, maintain existing behavior
         # Substantial content (>10 chars) is considered final
@@ -1167,8 +1315,8 @@ def get_response(
                             break
                         
                         # Special handling for Ollama to prevent infinite loops
-                        # Only generate summary if we're approaching max iterations or stuck in a loop
-                        if self._is_ollama_provider() and iteration_count >= 5:
+                        # Use dynamic analysis instead of hardcoded iteration count
+                        if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count):
                             tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
                             if tool_summary:
                                 final_response_text = tool_summary
@@ -1918,8 +2066,8 @@ async def get_response_async(
                         break
                     
                     # Special handling for Ollama to prevent infinite loops
-                    # Only generate summary if we're approaching max iterations or stuck in a loop
-                    if self._is_ollama_provider() and iteration_count >= 5:
+                    # Use dynamic analysis instead of hardcoded iteration count
+                    if self._is_ollama_provider() and self._should_generate_tool_summary(tool_results, response_text, iteration_count):
                         tool_summary = self._generate_ollama_tool_summary(tool_results, response_text)
                         if tool_summary:
                             final_response_text = tool_summary