test: make tests successfully run from forks (#2203)

anakin87 · web-flow · commit a86fa7ee21a7 · 2025-08-21T13:08:40.000+02:00
* use monkeypatch in Cohere unit tests

* fix Langfuse

* nvidia

* fix

* skip AWS authentication when PRs are from dependabot
diff --git a/.github/workflows/amazon_bedrock.yml b/.github/workflows/amazon_bedrock.yml
@@ -65,10 +65,10 @@ jobs:
       - name: Run unit tests
         run: hatch run test:unit
 
-      # Do not authenticate on pull requests from forks
+      # Do not authenticate on PRs from forks and on PRs created by dependabot
       - name: AWS authentication
         id: aws-auth
-        if: github.event.pull_request.head.repo.full_name == github.repository
+        if: github.event.pull_request.head.repo.full_name == github.repository && !startsWith(github.event.pull_request.head.ref, 'dependabot/')
         uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df
         with:
           aws-region: ${{ env.AWS_REGION }}
diff --git a/integrations/cohere/tests/test_document_embedder.py b/integrations/cohere/tests/test_document_embedder.py
@@ -15,7 +15,8 @@
 
 
 class TestCohereDocumentEmbedder:
-    def test_init_default(self):
+    def test_init_default(self, monkeypatch):
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         embedder = CohereDocumentEmbedder()
         assert embedder.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
         assert embedder.model == "embed-english-v2.0"
@@ -54,7 +55,8 @@ def test_init_with_parameters(self):
         assert embedder.embedding_separator == "-"
         assert embedder.embedding_type == EmbeddingTypes.FLOAT
 
-    def test_to_dict(self):
+    def test_to_dict(self, monkeypatch):
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         embedder_component = CohereDocumentEmbedder()
         component_dict = embedder_component.to_dict()
         assert component_dict == {
@@ -74,7 +76,8 @@ def test_to_dict(self):
             },
         }
 
-    def test_to_dict_with_custom_init_parameters(self):
+    def test_to_dict_with_custom_init_parameters(self, monkeypatch):
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         embedder_component = CohereDocumentEmbedder(
             api_key=Secret.from_env_var("ENV_VAR", strict=False),
             model="embed-multilingual-v2.0",
@@ -106,7 +109,8 @@ def test_to_dict_with_custom_init_parameters(self):
             },
         }
 
-    def test_from_dict(self):
+    def test_from_dict(self, monkeypatch):
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         component_dict = {
             "type": "haystack_integrations.components.embedders.cohere.document_embedder.CohereDocumentEmbedder",
             "init_parameters": {
diff --git a/integrations/cohere/tests/test_text_embedder.py b/integrations/cohere/tests/test_text_embedder.py
@@ -13,10 +13,11 @@
 
 
 class TestCohereTextEmbedder:
-    def test_init_default(self):
+    def test_init_default(self, monkeypatch):
         """
         Test default initialization parameters for CohereTextEmbedder.
         """
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         embedder = CohereTextEmbedder()
 
         assert embedder.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
@@ -46,10 +47,11 @@ def test_init_with_parameters(self):
         assert embedder.timeout == 60
         assert embedder.embedding_type == EmbeddingTypes.FLOAT
 
-    def test_to_dict(self):
+    def test_to_dict(self, monkeypatch):
         """
         Test serialization of this component to a dictionary, using default initialization parameters.
         """
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         embedder_component = CohereTextEmbedder()
         component_dict = embedder_component.to_dict()
         assert component_dict == {
@@ -65,10 +67,11 @@ def test_to_dict(self):
             },
         }
 
-    def test_to_dict_with_custom_init_parameters(self):
+    def test_to_dict_with_custom_init_parameters(self, monkeypatch):
         """
         Test serialization of this component to a dictionary, using custom initialization parameters.
         """
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         embedder_component = CohereTextEmbedder(
             api_key=Secret.from_env_var("ENV_VAR", strict=False),
             model="embed-multilingual-v2.0",
@@ -92,7 +95,8 @@ def test_to_dict_with_custom_init_parameters(self):
             },
         }
 
-    def test_from_dict(self):
+    def test_from_dict(self, monkeypatch):
+        monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         component_dict = {
             "type": "haystack_integrations.components.embedders.cohere.text_embedder.CohereTextEmbedder",
             "init_parameters": {
diff --git a/integrations/langfuse/tests/test_tracer.py b/integrations/langfuse/tests/test_tracer.py
@@ -3,17 +3,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import datetime
-import json
 import logging
 import sys
 from typing import Optional
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
-from haystack import Pipeline, component
 from haystack.dataclasses import ChatMessage, ToolCall
 
-from haystack_integrations.components.connectors.langfuse import LangfuseConnector
 from haystack_integrations.tracing.langfuse.tracer import (
     _COMPONENT_OUTPUT_KEY, DefaultSpanHandler, LangfuseSpan, LangfuseTracer,
     SpanContext)
@@ -403,58 +400,3 @@ def test_init_with_tracing_disabled(self, monkeypatch, caplog):
 
             LangfuseTracer(tracer=MockTracer(), name="Haystack", public=False)
             assert "tracing is disabled" in caplog.text
-
-    def test_context_cleanup_after_nested_failures(self):
-        """
-        Test that tracer context is properly cleaned up even when nested operations fail.
-
-        This test addresses a critical bug where failing nested operations (like inner pipelines)
-        could corrupt the tracing context, leaving stale spans that affect subsequent operations.
-        The fix ensures proper cleanup through try/finally blocks.
-
-        Before the fix: context would retain spans after failures (length > 0)
-        After the fix: context is always cleaned up (length == 0)
-        """
-
-
-        @component
-        class FailingParser:
-            @component.output_types(result=str)
-            def run(self, data: str):
-                # This will fail with ValueError when data is not valid JSON
-                parsed = json.loads(data)
-                return {"result": parsed["key"]}
-
-        @component
-        class ComponentWithNestedPipeline:
-            def __init__(self):
-                # This simulates IntentClassifier's internal pipeline
-                self.internal_pipeline = Pipeline()
-                self.internal_pipeline.add_component("parser", FailingParser())
-
-            @component.output_types(result=str)
-            def run(self, input_data: str):
-                # Run nested pipeline - this is where corruption occurs
-                result = self.internal_pipeline.run({"parser": {"data": input_data}})
-                return {"result": result["parser"]["result"]}
-
-        tracer = LangfuseConnector("test")
-
-        main_pipeline = Pipeline()
-        main_pipeline.add_component("nested_component", ComponentWithNestedPipeline())
-        main_pipeline.add_component("tracer", tracer)
-
-        # Test 1: First run will fail and should clean up context
-        try:
-            main_pipeline.run({"nested_component": {"input_data": "invalid json"}})
-        except Exception:
-            pass  # Expected to fail
-
-        # Critical assertion: context should be empty after failed operation
-        assert len(tracer.tracer._context) == 0
-
-        # Test 2: Second run should work normally with clean context
-        main_pipeline.run({"nested_component": {"input_data": '{"key": "valid"}'}})
-        
-        # Critical assertion: context should be empty after successful operation
-        assert len(tracer.tracer._context) == 0
diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py
@@ -6,6 +6,7 @@
 import time
 from typing import Any, Dict, List
 from urllib.parse import urlparse
+import json
 
 import pytest
 import requests
@@ -189,3 +190,67 @@ def run(self, messages: List[ChatMessage]) -> Dict[str, Any]:
     component_names = [key for obs in haystack_pipeline_run_observations for key in obs["input"].keys()]
     assert "prompt_builder" in component_names
     assert "llm" in component_names
+
+@pytest.mark.skipif(
+    not all(
+        [
+            os.environ.get("LANGFUSE_SECRET_KEY"),
+            os.environ.get("LANGFUSE_PUBLIC_KEY"),
+        ]
+    ),
+    reason="Missing required environment variables: LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY",
+)
+@pytest.mark.integration
+def test_context_cleanup_after_nested_failures():
+    """
+    Test that tracer context is properly cleaned up even when nested operations fail.
+
+    This test addresses a critical bug where failing nested operations (like inner pipelines)
+    could corrupt the tracing context, leaving stale spans that affect subsequent operations.
+    The fix ensures proper cleanup through try/finally blocks.
+
+    Before the fix: context would retain spans after failures (length > 0)
+    After the fix: context is always cleaned up (length == 0)
+    """
+
+    @component
+    class FailingParser:
+        @component.output_types(result=str)
+        def run(self, data: str):
+            # This will fail with ValueError when data is not valid JSON
+            parsed = json.loads(data)
+            return {"result": parsed["key"]}
+
+    @component
+    class ComponentWithNestedPipeline:
+        def __init__(self):
+            # This simulates IntentClassifier's internal pipeline
+            self.internal_pipeline = Pipeline()
+            self.internal_pipeline.add_component("parser", FailingParser())
+
+        @component.output_types(result=str)
+        def run(self, input_data: str):
+            # Run nested pipeline - this is where corruption occurs
+            result = self.internal_pipeline.run({"parser": {"data": input_data}})
+            return {"result": result["parser"]["result"]}
+
+    tracer = LangfuseConnector("test")
+
+    main_pipeline = Pipeline()
+    main_pipeline.add_component("nested_component", ComponentWithNestedPipeline())
+    main_pipeline.add_component("tracer", tracer)
+
+    # Test 1: First run will fail and should clean up context
+    try:
+        main_pipeline.run({"nested_component": {"input_data": "invalid json"}})
+    except Exception:
+        pass  # Expected to fail
+
+    # Critical assertion: context should be empty after failed operation
+    assert len(tracer.tracer._context) == 0
+
+    # Test 2: Second run should work normally with clean context
+    main_pipeline.run({"nested_component": {"input_data": '{"key": "valid"}'}})
+    
+    # Critical assertion: context should be empty after successful operation
+    assert len(tracer.tracer._context) == 0    
diff --git a/integrations/nvidia/tests/test_nim_backend.py b/integrations/nvidia/tests/test_nim_backend.py
@@ -86,11 +86,13 @@ def test_init_default(self, monkeypatch):
         assert backend.session.headers["authorization"] == "Bearer fake-api-key"
         assert backend.timeout == REQUEST_TIMEOUT
 
-    def test_init_with_client_enum(self):
+    def test_init_with_client_enum(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
         backend = NimBackend(model="custom-model", api_url="http://localhost:8000", client=Client.NVIDIA_TEXT_EMBEDDER)
         assert backend.client == Client.NVIDIA_TEXT_EMBEDDER
 
-    def test_init_without_client(self):
+    def test_init_without_client(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
         backend = NimBackend(model="custom-model", api_url="http://localhost:8000")
         assert backend.client is None
         assert backend.model_type is None
@@ -145,7 +147,8 @@ def test_init_with_incompatible_client_raises_error(self, monkeypatch):
                 client="NvidiaGenerator",  # chat client
             )
 
-    def test_init_with_non_hosted_model(self):
+    def test_init_with_non_hosted_model(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key")
         backend = NimBackend(model="unknown-model", api_url="http://localhost:8000", client="NvidiaTextEmbedder")
 
         # validation is skipped for non-hosted models