Skip to content

Commit b32fcc4

Browse files
ankursharmascopybara-github
authored andcommitted
fix: Update create_eval_set API to return the created EvalSet and it route
PiperOrigin-RevId: 797464408
1 parent 54ed079 commit b32fcc4

File tree

8 files changed

+94
-30
lines changed

8 files changed

+94
-30
lines changed

src/google/adk/cli/adk_web_server.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from pydantic import Field
4646
from pydantic import ValidationError
4747
from starlette.types import Lifespan
48+
from typing_extensions import deprecated
4849
from typing_extensions import override
4950
from watchdog.observers import Observer
5051

@@ -66,6 +67,7 @@
6667
from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
6768
from ..evaluation.eval_metrics import MetricInfo
6869
from ..evaluation.eval_result import EvalSetResult
70+
from ..evaluation.eval_set import EvalSet
6971
from ..evaluation.eval_set_results_manager import EvalSetResultsManager
7072
from ..evaluation.eval_sets_manager import EvalSetsManager
7173
from ..events.event import Event
@@ -197,6 +199,10 @@ class GetEventGraphResult(common.BaseModel):
197199
dot_src: str
198200

199201

202+
class CreateEvalSetRequest(common.BaseModel):
203+
eval_set_id: str
204+
205+
200206
class AdkWebServer:
201207
"""Helper class for setting up and running the ADK web server on FastAPI.
202208
@@ -466,23 +472,42 @@ async def delete_session(
466472
)
467473

468474
@app.post(
469-
"/apps/{app_name}/eval_sets/{eval_set_id}",
475+
"/apps/{app_name}/eval-sets",
470476
response_model_exclude_none=True,
471477
tags=[TAG_EVALUATION],
472478
)
473-
async def create_eval_set(
474-
app_name: str,
475-
eval_set_id: str,
476-
):
477-
"""Creates an eval set, given the id."""
479+
async def create_eval_set_v2(
480+
app_name: str, create_eval_set_request: CreateEvalSetRequest
481+
) -> EvalSet:
478482
try:
479-
self.eval_sets_manager.create_eval_set(app_name, eval_set_id)
483+
return self.eval_sets_manager.create_eval_set(
484+
app_name=app_name, eval_set_id=create_eval_set_request.eval_set_id
485+
)
480486
except ValueError as ve:
481487
raise HTTPException(
482488
status_code=400,
483489
detail=str(ve),
484490
) from ve
485491

492+
@deprecated(
493+
"Please use create_eval_set_v2 instead. This will be removed in future"
494+
" releases."
495+
)
496+
@app.post(
497+
"/apps/{app_name}/eval_sets/{eval_set_id}",
498+
response_model_exclude_none=True,
499+
tags=[TAG_EVALUATION],
500+
)
501+
async def create_eval_set(
502+
app_name: str,
503+
eval_set_id: str,
504+
):
505+
"""Creates an eval set, given the id."""
506+
create_eval_set_v2(
507+
app_name=app_name,
508+
create_eval_set_request=CreateEvalSetRequest(eval_set_id=eval_set_id),
509+
)
510+
486511
@app.get(
487512
"/apps/{app_name}/eval_sets",
488513
response_model_exclude_none=True,

src/google/adk/evaluation/eval_sets_manager.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from abc import abstractmethod
1919
from typing import Optional
2020

21-
from ..errors.not_found_error import NotFoundError
2221
from .eval_case import EvalCase
2322
from .eval_set import EvalSet
2423

@@ -31,8 +30,12 @@ def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
3130
"""Returns an EvalSet identified by an app_name and eval_set_id."""
3231

3332
@abstractmethod
34-
def create_eval_set(self, app_name: str, eval_set_id: str):
35-
"""Creates an empty EvalSet given the app_name and eval_set_id."""
33+
def create_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
34+
"""Creates and returns an empty EvalSet given the app_name and eval_set_id.
35+
36+
Raises:
37+
ValueError: If eval set id is not valid or an eval set already exists.
38+
"""
3639

3740
@abstractmethod
3841
def list_eval_sets(self, app_name: str) -> list[str]:

src/google/adk/evaluation/gcs_eval_sets_manager.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,12 @@ def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
9999
return self._load_eval_set_from_blob(eval_set_blob_name)
100100

101101
@override
102-
def create_eval_set(self, app_name: str, eval_set_id: str):
103-
"""Creates an empty EvalSet and saves it to GCS."""
102+
def create_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
103+
"""Creates an empty EvalSet and saves it to GCS.
104+
105+
Raises:
106+
ValueError: If eval set id is not valid or an eval set already exists.
107+
"""
104108
self._validate_id(id_name="Eval Set Id", id_value=eval_set_id)
105109
new_eval_set_blob_name = self._get_eval_set_blob_name(app_name, eval_set_id)
106110
if self.bucket.blob(new_eval_set_blob_name).exists():
@@ -115,6 +119,7 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
115119
creation_timestamp=time.time(),
116120
)
117121
self._write_eval_set_to_blob(new_eval_set_blob_name, new_eval_set)
122+
return new_eval_set
118123

119124
@override
120125
def list_eval_sets(self, app_name: str) -> list[str]:

src/google/adk/evaluation/in_memory_eval_sets_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
6565
)
6666
self._eval_sets[app_name][eval_set_id] = new_eval_set
6767
self._eval_cases[app_name][eval_set_id] = {}
68+
return new_eval_set
6869

6970
@override
7071
def list_eval_sets(self, app_name: str) -> list[str]:

src/google/adk/evaluation/local_eval_sets_manager.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,12 @@ def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
205205
return None
206206

207207
@override
208-
def create_eval_set(self, app_name: str, eval_set_id: str):
209-
"""Creates an empty EvalSet given the app_name and eval_set_id."""
208+
def create_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
209+
"""Creates and returns an empty EvalSet given the app_name and eval_set_id.
210+
211+
Raises:
212+
ValueError: If eval set id is not valid or an eval set already exists.
213+
"""
210214
self._validate_id(id_name="Eval Set Id", id_value=eval_set_id)
211215

212216
# Define the file path
@@ -224,6 +228,11 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
224228
creation_timestamp=time.time(),
225229
)
226230
self._write_eval_set_to_path(new_eval_set_path, new_eval_set)
231+
return new_eval_set
232+
233+
raise ValueError(
234+
f"EvalSet {eval_set_id} already exists for app {app_name}."
235+
)
227236

228237
@override
229238
def list_eval_sets(self, app_name: str) -> list[str]:

tests/unittests/evaluation/test_gcs_eval_sets_manager.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,21 @@ def test_gcs_eval_sets_manager_create_eval_set_success(
7979
app_name, eval_set_id
8080
)
8181

82-
gcs_eval_sets_manager.create_eval_set(app_name, eval_set_id)
82+
created_eval_set = gcs_eval_sets_manager.create_eval_set(
83+
app_name, eval_set_id
84+
)
8385

86+
expected_eval_set = EvalSet(
87+
eval_set_id=eval_set_id,
88+
name=eval_set_id,
89+
eval_cases=[],
90+
creation_timestamp=mocked_time,
91+
)
8492
mock_write_eval_set_to_blob.assert_called_once_with(
8593
eval_set_blob_name,
86-
EvalSet(
87-
eval_set_id=eval_set_id,
88-
name=eval_set_id,
89-
eval_cases=[],
90-
creation_timestamp=mocked_time,
91-
),
94+
expected_eval_set,
9295
)
96+
assert created_eval_set == expected_eval_set
9397

9498
def test_gcs_eval_sets_manager_create_eval_set_invalid_id(
9599
self, gcs_eval_sets_manager

tests/unittests/evaluation/test_in_memory_eval_sets_manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ def eval_case_id():
4141

4242

4343
def test_create_eval_set(manager, app_name, eval_set_id):
44-
manager.create_eval_set(app_name, eval_set_id)
45-
eval_set = manager.get_eval_set(app_name, eval_set_id)
44+
eval_set = manager.create_eval_set(app_name, eval_set_id)
4645
assert eval_set is not None
4746
assert eval_set.eval_set_id == eval_set_id
4847
assert eval_set.eval_cases == []

tests/unittests/evaluation/test_local_eval_sets_manager.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -370,16 +370,21 @@ def test_local_eval_sets_manager_create_eval_set_success(
370370
eval_set_id + _EVAL_SET_FILE_EXTENSION,
371371
)
372372

373-
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)
373+
created_eval_set = local_eval_sets_manager.create_eval_set(
374+
app_name, eval_set_id
375+
)
376+
377+
expected_eval_set = EvalSet(
378+
eval_set_id=eval_set_id,
379+
name=eval_set_id,
380+
eval_cases=[],
381+
creation_timestamp=mocked_time,
382+
)
374383
mock_write_eval_set_to_path.assert_called_once_with(
375384
eval_set_file_path,
376-
EvalSet(
377-
eval_set_id=eval_set_id,
378-
name=eval_set_id,
379-
eval_cases=[],
380-
creation_timestamp=mocked_time,
381-
),
385+
expected_eval_set,
382386
)
387+
assert created_eval_set == expected_eval_set
383388

384389
def test_local_eval_sets_manager_create_eval_set_invalid_id(
385390
self, local_eval_sets_manager
@@ -390,6 +395,19 @@ def test_local_eval_sets_manager_create_eval_set_invalid_id(
390395
with pytest.raises(ValueError, match="Invalid Eval Set Id"):
391396
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)
392397

398+
def test_local_eval_sets_manager_create_eval_set_already_exists(
399+
self, local_eval_sets_manager, mocker
400+
):
401+
app_name = "test_app"
402+
eval_set_id = "existing_eval_set_id"
403+
mocker.patch("os.path.exists", return_value=True)
404+
405+
with pytest.raises(
406+
ValueError,
407+
match="EvalSet existing_eval_set_id already exists for app test_app.",
408+
):
409+
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)
410+
393411
def test_local_eval_sets_manager_list_eval_sets_success(
394412
self, local_eval_sets_manager, mocker
395413
):

0 commit comments

Comments
 (0)