2727from google .adk .evaluation .eval_result import EvalCaseResult
2828from google .adk .evaluation .eval_set import EvalCase
2929from google .adk .evaluation .eval_set import EvalSet
30+ from google .adk .evaluation .eval_set_results_manager import EvalSetResultsManager
3031from google .adk .evaluation .eval_sets_manager import EvalSetsManager
3132from google .adk .evaluation .evaluator import EvalStatus
3233from google .adk .evaluation .evaluator import EvaluationResult
@@ -51,13 +52,21 @@ def dummy_agent():
5152
5253
5354@pytest .fixture
54- def eval_service (dummy_agent , mock_eval_sets_manager ):
55+ def mock_eval_set_results_manager ():
56+ return mock .create_autospec (EvalSetResultsManager )
57+
58+
59+ @pytest .fixture
60+ def eval_service (
61+ dummy_agent , mock_eval_sets_manager , mock_eval_set_results_manager
62+ ):
5563 DEFAULT_METRIC_EVALUATOR_REGISTRY .register_evaluator (
5664 metric_name = "fake_metric" , evaluator = FakeEvaluator
5765 )
5866 return LocalEvalService (
5967 root_agent = dummy_agent ,
6068 eval_sets_manager = mock_eval_sets_manager ,
69+ eval_set_results_manager = mock_eval_set_results_manager ,
6170 )
6271
6372
@@ -90,7 +99,9 @@ def evaluate_invocations(
9099
91100@pytest .mark .asyncio
92101async def test_perform_inference_success (
93- eval_service , dummy_agent , mock_eval_sets_manager
102+ eval_service ,
103+ dummy_agent ,
104+ mock_eval_sets_manager ,
94105):
95106 eval_set = EvalSet (
96107 eval_set_id = "test_eval_set" ,
@@ -127,7 +138,9 @@ async def test_perform_inference_success(
127138
128139@pytest .mark .asyncio
129140async def test_perform_inference_with_case_ids (
130- eval_service , dummy_agent , mock_eval_sets_manager
141+ eval_service ,
142+ dummy_agent ,
143+ mock_eval_sets_manager ,
131144):
132145 eval_set = EvalSet (
133146 eval_set_id = "test_eval_set" ,
@@ -172,7 +185,8 @@ async def test_perform_inference_with_case_ids(
172185
173186@pytest .mark .asyncio
174187async def test_perform_inference_eval_set_not_found (
175- eval_service , mock_eval_sets_manager
188+ eval_service ,
189+ mock_eval_sets_manager ,
176190):
177191 mock_eval_sets_manager .get_eval_set .return_value = None
178192
@@ -188,7 +202,9 @@ async def test_perform_inference_eval_set_not_found(
188202
189203
190204@pytest .mark .asyncio
191- async def test_evaluate_success (eval_service , mock_eval_sets_manager ):
205+ async def test_evaluate_success (
206+ eval_service , mock_eval_sets_manager , mock_eval_set_results_manager
207+ ):
192208 inference_results = [
193209 InferenceResult (
194210 app_name = "test_app" ,
@@ -224,11 +240,13 @@ async def test_evaluate_success(eval_service, mock_eval_sets_manager):
224240 assert isinstance (results [0 ], EvalCaseResult )
225241 assert isinstance (results [1 ], EvalCaseResult )
226242 assert mock_eval_sets_manager .get_eval_case .call_count == 2
243+ assert mock_eval_set_results_manager .save_eval_set_result .call_count == 2
227244
228245
229246@pytest .mark .asyncio
230247async def test_evaluate_eval_case_not_found (
231- eval_service , mock_eval_sets_manager
248+ eval_service ,
249+ mock_eval_sets_manager ,
232250):
233251 inference_results = [
234252 InferenceResult (
@@ -256,7 +274,7 @@ async def test_evaluate_eval_case_not_found(
256274
257275@pytest .mark .asyncio
258276async def test_evaluate_single_inference_result (
259- eval_service , mock_eval_sets_manager
277+ eval_service , mock_eval_sets_manager , mock_eval_set_results_manager
260278):
261279 invocation = Invocation (
262280 user_content = genai_types .Content (
@@ -289,7 +307,7 @@ async def test_evaluate_single_inference_result(
289307 mock_eval_case .session_input = None
290308 mock_eval_sets_manager .get_eval_case .return_value = mock_eval_case
291309
292- result = await eval_service ._evaluate_single_inference_result (
310+ _ , result = await eval_service ._evaluate_single_inference_result (
293311 inference_result = inference_result , evaluate_config = evaluate_config
294312 )
295313
0 commit comments