|
1 | 1 | import json
|
| 2 | +import threading |
| 3 | +from typing import Any |
2 | 4 |
|
3 | 5 | import pytest
|
4 | 6 |
|
5 | 7 | import dspy
|
6 | 8 | import dspy.clients
|
7 | 9 | from dspy import Example
|
8 | 10 | from dspy.predict import Predict
|
| 11 | +from dspy.utils.dummies import DummyLM |
9 | 12 |
|
10 | 13 |
|
11 | 14 | class SimpleModule(dspy.Module):
|
@@ -66,3 +69,57 @@ def test_metric_requires_feedback_signature():
|
66 | 69 | reflection_lm = DictDummyLM([])
|
67 | 70 | with pytest.raises(TypeError):
|
68 | 71 | dspy.GEPA(metric=bad_metric, reflection_lm=reflection_lm, max_metric_calls=1)
|
| 72 | + |
| 73 | + |
| 74 | +def any_metric( |
| 75 | + gold: dspy.Example, |
| 76 | + pred: dspy.Prediction, |
| 77 | + trace: Any = None, |
| 78 | + pred_name: str | None = None, |
| 79 | + pred_trace: Any = None, |
| 80 | +) -> float: |
| 81 | + """ |
| 82 | + For this test, we only care that the program runs, not the score. |
| 83 | + """ |
| 84 | + return 0.0 # ← Just returns 0.0, doesn't access any attributes! |
| 85 | + |
| 86 | + |
| 87 | +def test_gepa_compile_with_track_usage_no_tuple_error(caplog): |
| 88 | + """ |
| 89 | + GEPA.compile should not log tuple-usage error when track_usage=True and complete without hanging. |
| 90 | + Before, compile would hang and/or log "'tuple' object has no attribute 'set_lm_usage'" repeatedly. |
| 91 | + """ |
| 92 | + student = dspy.Predict("question -> answer") |
| 93 | + trainset = [dspy.Example(question="What is 2+2?", answer="4").with_inputs("question")] |
| 94 | + |
| 95 | + task_lm = DummyLM([{"answer": "mock answer 1"}]) |
| 96 | + reflection_lm = DummyLM([{"new_instruction": "Something new."}]) |
| 97 | + |
| 98 | + compiled_container: dict[str, Any] = {} |
| 99 | + exc_container: dict[str, BaseException] = {} |
| 100 | + |
| 101 | + def run_compile(): |
| 102 | + try: |
| 103 | + with dspy.context(lm=task_lm, track_usage=True): |
| 104 | + optimizer = dspy.GEPA(metric=any_metric, reflection_lm=reflection_lm, max_metric_calls=3) |
| 105 | + compiled_container["prog"] = optimizer.compile(student, trainset=trainset, valset=trainset) |
| 106 | + except BaseException as e: |
| 107 | + exc_container["e"] = e |
| 108 | + |
| 109 | + t = threading.Thread(target=run_compile, daemon=True) |
| 110 | + t.start() |
| 111 | + t.join(timeout=1.0) |
| 112 | + |
| 113 | + # Assert compile did not hang (pre-fix behavior would time out here) |
| 114 | + assert not t.is_alive(), "GEPA.compile did not complete within timeout (likely pre-fix behavior)." |
| 115 | + |
| 116 | + # Assert no tuple-usage error is logged anymore |
| 117 | + assert "'tuple' object has no attribute 'set_lm_usage'" not in caplog.text |
| 118 | + |
| 119 | + # If any exception occurred, fail explicitly |
| 120 | + if "e" in exc_container: |
| 121 | + pytest.fail(f"GEPA.compile raised unexpectedly: {exc_container['e']}") |
| 122 | + |
| 123 | + # No timeout, no exception -> so the program must exist |
| 124 | + if "prog" not in compiled_container: |
| 125 | + pytest.fail("GEPA.compile did return a program (likely pre-fix behavior).") |
0 commit comments