Skip to content

Commit 14a1af7

Browse files
m7md7sienCopilot
andauthored
Add Agent Response Tool Evaluation Sample with Function Tool (#43966)
* Add Agent Response Tool Evaluation Sample with Function Tool * Update sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation_with_function_tool.py Co-authored-by: Copilot <[email protected]> * Update sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation_with_function_tool.py Co-authored-by: Copilot <[email protected]> * Address comments --------- Co-authored-by: Copilot <[email protected]>
1 parent ffe542d commit 14a1af7

File tree

2 files changed

+182
-0
lines changed

2 files changed

+182
-0
lines changed

sdk/ai/azure-ai-projects/samples/evaluations/agentic_evaluators/sample_tool_call_success.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
from azure.identity import DefaultAzureCredential
3333
from azure.ai.projects import AIProjectClient
34+
from openai.types.eval_create_params import DataSourceConfigCustom
3435
from openai.types.evals.create_eval_jsonl_run_data_source_param import (
3536
CreateEvalJSONLRunDataSourceParam,
3637
SourceFileContent,
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# ------------------------------------
2+
# Copyright (c) Microsoft Corporation.
3+
# Licensed under the MIT License.
4+
# ------------------------------------
5+
"""
6+
DESCRIPTION:
7+
This sample demonstrates how to create and run an evaluation for an Azure AI agent response
8+
using the synchronous AIProjectClient.
9+
10+
The OpenAI compatible Evals calls in this sample are made using
11+
the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
12+
for more information.
13+
14+
USAGE:
15+
python sample_agent_response_evaluation_with_function_tool.py
16+
17+
Before running the sample:
18+
19+
pip install "azure-ai-projects>=2.0.0b1" azure-identity openai python-dotenv
20+
21+
Set these environment variables with your own values:
22+
1) AZURE_AI_PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
23+
page of your Microsoft Foundry portal.
24+
2) AZURE_AI_MODEL_DEPLOYMENT_NAME - The deployment name of the AI model, as found under the "Name" column in
25+
the "Models + endpoints" tab in your Microsoft Foundry project.
26+
"""
27+
28+
import json
29+
import os
30+
import time
31+
from typing import Union
32+
from pprint import pprint
33+
from dotenv import load_dotenv
34+
from azure.identity import DefaultAzureCredential
35+
from azure.ai.projects import AIProjectClient
36+
from azure.ai.projects.models import PromptAgentDefinition, Tool, FunctionTool
37+
from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam
38+
from openai.types.evals.run_create_response import RunCreateResponse
39+
from openai.types.evals.run_retrieve_response import RunRetrieveResponse
40+
41+
load_dotenv()
42+
43+
model_deployment_name = os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"]
44+
45+
# Define a function tool for the model to use
46+
func_tool = FunctionTool(
47+
name="get_horoscope",
48+
parameters={
49+
"type": "object",
50+
"properties": {
51+
"sign": {
52+
"type": "string",
53+
"description": "An astrological sign like Taurus or Aquarius",
54+
},
55+
},
56+
"required": ["sign"],
57+
"additionalProperties": False,
58+
},
59+
description="Get today's horoscope for an astrological sign.",
60+
strict=True,
61+
)
62+
63+
tools: list[Tool] = [func_tool]
64+
65+
66+
def get_horoscope(sign: str) -> str:
67+
"""Generate a horoscope for the given astrological sign."""
68+
return f"{sign}: Next Tuesday you will befriend a baby otter."
69+
70+
project_client = AIProjectClient(
71+
endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
72+
credential=DefaultAzureCredential(),
73+
)
74+
75+
with project_client:
76+
77+
openai_client = project_client.get_openai_client()
78+
79+
agent = project_client.agents.create_version(
80+
agent_name="MyAgent",
81+
definition=PromptAgentDefinition(
82+
model=model_deployment_name,
83+
instructions="You are a helpful assistant that can use function tools.",
84+
tools=tools,
85+
),
86+
)
87+
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
88+
89+
# Prompt the model with tools defined
90+
response = openai_client.responses.create(
91+
input="What is my horoscope? I am an Aquarius.",
92+
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
93+
)
94+
print(f"Response output: {response.output_text}")
95+
96+
input_list: ResponseInputParam = []
97+
# Process function calls
98+
for item in response.output:
99+
if item.type == "function_call":
100+
if item.name == "get_horoscope":
101+
# Execute the function logic for get_horoscope
102+
horoscope = get_horoscope(**json.loads(item.arguments))
103+
104+
# Provide function call results to the model
105+
input_list.append(
106+
FunctionCallOutput(
107+
type="function_call_output",
108+
call_id=item.call_id,
109+
output=json.dumps({"horoscope": horoscope}),
110+
)
111+
)
112+
113+
print("Final input:")
114+
print(input_list)
115+
116+
response = openai_client.responses.create(
117+
input=input_list,
118+
previous_response_id=response.id,
119+
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
120+
)
121+
print(f"Response output: {response.output_text} (id: {response.id})")
122+
123+
data_source_config = {"type": "azure_ai_source", "scenario": "responses"}
124+
testing_criteria = [
125+
{
126+
"type": "azure_ai_evaluator",
127+
"name": "tool_call_accuracy",
128+
"evaluator_name": "builtin.tool_call_accuracy",
129+
"initialization_parameters": {
130+
"deployment_name": f"{model_deployment_name}"
131+
}
132+
}
133+
]
134+
eval_object = openai_client.evals.create(
135+
name="Agent Response Evaluation",
136+
data_source_config=data_source_config, # type: ignore
137+
testing_criteria=testing_criteria, # type: ignore
138+
)
139+
print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")
140+
141+
data_source = {
142+
"type": "azure_ai_responses",
143+
"item_generation_params": {
144+
"type": "response_retrieval",
145+
"data_mapping": {"response_id": "{{item.resp_id}}"},
146+
"source": {"type": "file_content", "content": [{"item": {"resp_id": response.id}}]},
147+
},
148+
}
149+
150+
response_eval_run: Union[RunCreateResponse, RunRetrieveResponse] = openai_client.evals.runs.create(
151+
eval_id=eval_object.id, name=f"Evaluation Run for Agent {agent.name}", data_source=data_source # type: ignore
152+
)
153+
print(f"Evaluation run created (id: {response_eval_run.id})")
154+
155+
while response_eval_run.status not in ["completed", "failed"]:
156+
response_eval_run = openai_client.evals.runs.retrieve(run_id=response_eval_run.id, eval_id=eval_object.id)
157+
print(f"Waiting for eval run to complete... current status: {response_eval_run.status}")
158+
time.sleep(5)
159+
160+
if response_eval_run.status == "completed":
161+
print("\n✓ Evaluation run completed successfully!")
162+
print(f"Result Counts: {response_eval_run.result_counts}")
163+
164+
output_items = list(
165+
openai_client.evals.runs.output_items.list(run_id=response_eval_run.id, eval_id=eval_object.id)
166+
)
167+
print(f"\nOUTPUT ITEMS (Total: {len(output_items)})")
168+
print(f"Eval Run Report URL: {response_eval_run.report_url}")
169+
170+
print(f"{'-'*60}")
171+
pprint(output_items)
172+
print(f"{'-'*60}")
173+
else:
174+
print(f"Eval Run Report URL: {response_eval_run.report_url}")
175+
print("\n✗ Evaluation run failed.")
176+
177+
openai_client.evals.delete(eval_id=eval_object.id)
178+
print("Evaluation deleted")
179+
180+
project_client.agents.delete(agent_name=agent.name)
181+
print("Agent deleted")

0 commit comments

Comments
 (0)