Fix GEPA usage tracking with tuple outputs (#8739)

smec-cgint · web-flow · commit 239e6c091cc1 · 2025-08-31T20:59:47.000-04:00
* Fix GEPA usage tracking with tuple outputs

- Fix AttributeError when track_usage=True and GEPA returns tuple
- Module.__call__ now handles both prediction objects and tuples
- Add regression test for GEPA compile with usage tracking
- Resolves hanging/infinite loop when GEPA patches return tuples

Fixes issue where GEPA bootstrap tracing returns (prediction, trace)
tuples but usage tracking expected only prediction objects.

* Rework test to simplify / self containe and make fix more readable

* revert other code that has been formatted
diff --git a/dspy/primitives/module.py b/dspy/primitives/module.py
@@ -72,7 +72,21 @@ def __call__(self, *args, **kwargs) -> Prediction:
             if settings.track_usage and thread_local_overrides.get().get("usage_tracker") is None:
                 with track_usage() as usage_tracker:
                     output = self.forward(*args, **kwargs)
-                output.set_lm_usage(usage_tracker.get_total_tokens())
+                tokens = usage_tracker.get_total_tokens()
+
+                # Some optimizers (e.g., GEPA bootstrap tracing) temporarily patch
+                # module.forward to return a tuple: (prediction, trace).
+                # When usage tracking is enabled, ensure we attach usage to the
+                # prediction object if present.
+                prediction_in_output = None
+                if isinstance(output, Prediction):
+                    prediction_in_output = output
+                elif isinstance(output, tuple) and len(output) > 0 and isinstance(output[0], Prediction):
+                    prediction_in_output = output[0]
+                if not prediction_in_output:
+                    raise ValueError("No prediction object found in output to call set_lm_usage on.")
+
+                prediction_in_output.set_lm_usage(tokens)
                 return output
 
             return self.forward(*args, **kwargs)
diff --git a/tests/teleprompt/test_gepa.py b/tests/teleprompt/test_gepa.py
@@ -1,11 +1,14 @@
 import json
+import threading
+from typing import Any
 
 import pytest
 
 import dspy
 import dspy.clients
 from dspy import Example
 from dspy.predict import Predict
+from dspy.utils.dummies import DummyLM
 
 
 class SimpleModule(dspy.Module):
@@ -66,3 +69,57 @@ def test_metric_requires_feedback_signature():
     reflection_lm = DictDummyLM([])
     with pytest.raises(TypeError):
         dspy.GEPA(metric=bad_metric, reflection_lm=reflection_lm, max_metric_calls=1)
+
+
+def any_metric(
+    gold: dspy.Example,
+    pred: dspy.Prediction,
+    trace: Any = None,
+    pred_name: str | None = None,
+    pred_trace: Any = None,
+) -> float:
+    """
+    For this test, we only care that the program runs, not the score.
+    """
+    return 0.0  # ← Just returns 0.0, doesn't access any attributes!
+
+
+def test_gepa_compile_with_track_usage_no_tuple_error(caplog):
+    """
+    GEPA.compile should not log tuple-usage error when track_usage=True and complete without hanging.
+    Before, compile would hang and/or log "'tuple' object has no attribute 'set_lm_usage'" repeatedly.
+    """
+    student = dspy.Predict("question -> answer")
+    trainset = [dspy.Example(question="What is 2+2?", answer="4").with_inputs("question")]
+
+    task_lm = DummyLM([{"answer": "mock answer 1"}])
+    reflection_lm = DummyLM([{"new_instruction": "Something new."}])
+
+    compiled_container: dict[str, Any] = {}
+    exc_container: dict[str, BaseException] = {}
+
+    def run_compile():
+        try:
+            with dspy.context(lm=task_lm, track_usage=True):
+                optimizer = dspy.GEPA(metric=any_metric, reflection_lm=reflection_lm, max_metric_calls=3)
+                compiled_container["prog"] = optimizer.compile(student, trainset=trainset, valset=trainset)
+        except BaseException as e:
+            exc_container["e"] = e
+
+    t = threading.Thread(target=run_compile, daemon=True)
+    t.start()
+    t.join(timeout=1.0)
+
+    # Assert compile did not hang (pre-fix behavior would time out here)
+    assert not t.is_alive(), "GEPA.compile did not complete within timeout (likely pre-fix behavior)."
+
+    # Assert no tuple-usage error is logged anymore
+    assert "'tuple' object has no attribute 'set_lm_usage'" not in caplog.text
+
+    # If any exception occurred, fail explicitly
+    if "e" in exc_container:
+        pytest.fail(f"GEPA.compile raised unexpectedly: {exc_container['e']}")
+
+    # No timeout, no exception -> so the program must exist
+    if "prog" not in compiled_container:
+        pytest.fail("GEPA.compile did return a program (likely pre-fix behavior).")