|
| 1 | +# ------------------------------------ |
| 2 | +# Copyright (c) Microsoft Corporation. |
| 3 | +# Licensed under the MIT License. |
| 4 | +# ------------------------------------ |
| 5 | +""" |
| 6 | +DESCRIPTION: |
| 7 | + This sample demonstrates how to create and run an evaluation for an Azure AI agent response |
| 8 | + using the synchronous AIProjectClient. |
| 9 | +
|
| 10 | + The OpenAI compatible Evals calls in this sample are made using |
| 11 | + the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference |
| 12 | + for more information. |
| 13 | +
|
| 14 | +USAGE: |
| 15 | + python sample_agent_response_evaluation_with_function_tool.py |
| 16 | +
|
| 17 | + Before running the sample: |
| 18 | +
|
| 19 | + pip install "azure-ai-projects>=2.0.0b1" azure-identity openai python-dotenv |
| 20 | +
|
| 21 | + Set these environment variables with your own values: |
| 22 | + 1) AZURE_AI_PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview |
| 23 | + page of your Microsoft Foundry portal. |
| 24 | + 2) AZURE_AI_MODEL_DEPLOYMENT_NAME - The deployment name of the AI model, as found under the "Name" column in |
| 25 | + the "Models + endpoints" tab in your Microsoft Foundry project. |
| 26 | +""" |
| 27 | + |
| 28 | +import json |
| 29 | +import os |
| 30 | +import time |
| 31 | +from typing import Union |
| 32 | +from pprint import pprint |
| 33 | +from dotenv import load_dotenv |
| 34 | +from azure.identity import DefaultAzureCredential |
| 35 | +from azure.ai.projects import AIProjectClient |
| 36 | +from azure.ai.projects.models import PromptAgentDefinition, Tool, FunctionTool |
| 37 | +from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam |
| 38 | +from openai.types.evals.run_create_response import RunCreateResponse |
| 39 | +from openai.types.evals.run_retrieve_response import RunRetrieveResponse |
| 40 | + |
| 41 | +load_dotenv() |
| 42 | + |
| 43 | +model_deployment_name = os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"] |
| 44 | + |
| 45 | +# Define a function tool for the model to use |
| 46 | +func_tool = FunctionTool( |
| 47 | + name="get_horoscope", |
| 48 | + parameters={ |
| 49 | + "type": "object", |
| 50 | + "properties": { |
| 51 | + "sign": { |
| 52 | + "type": "string", |
| 53 | + "description": "An astrological sign like Taurus or Aquarius", |
| 54 | + }, |
| 55 | + }, |
| 56 | + "required": ["sign"], |
| 57 | + "additionalProperties": False, |
| 58 | + }, |
| 59 | + description="Get today's horoscope for an astrological sign.", |
| 60 | + strict=True, |
| 61 | +) |
| 62 | + |
| 63 | +tools: list[Tool] = [func_tool] |
| 64 | + |
| 65 | + |
| 66 | +def get_horoscope(sign: str) -> str: |
| 67 | + """Generate a horoscope for the given astrological sign.""" |
| 68 | + return f"{sign}: Next Tuesday you will befriend a baby otter." |
| 69 | + |
| 70 | +project_client = AIProjectClient( |
| 71 | + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], |
| 72 | + credential=DefaultAzureCredential(), |
| 73 | +) |
| 74 | + |
| 75 | +with project_client: |
| 76 | + |
| 77 | + openai_client = project_client.get_openai_client() |
| 78 | + |
| 79 | + agent = project_client.agents.create_version( |
| 80 | + agent_name="MyAgent", |
| 81 | + definition=PromptAgentDefinition( |
| 82 | + model=model_deployment_name, |
| 83 | + instructions="You are a helpful assistant that can use function tools.", |
| 84 | + tools=tools, |
| 85 | + ), |
| 86 | + ) |
| 87 | + print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})") |
| 88 | + |
| 89 | + # Prompt the model with tools defined |
| 90 | + response = openai_client.responses.create( |
| 91 | + input="What is my horoscope? I am an Aquarius.", |
| 92 | + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, |
| 93 | + ) |
| 94 | + print(f"Response output: {response.output_text}") |
| 95 | + |
| 96 | + input_list: ResponseInputParam = [] |
| 97 | + # Process function calls |
| 98 | + for item in response.output: |
| 99 | + if item.type == "function_call": |
| 100 | + if item.name == "get_horoscope": |
| 101 | + # Execute the function logic for get_horoscope |
| 102 | + horoscope = get_horoscope(**json.loads(item.arguments)) |
| 103 | + |
| 104 | + # Provide function call results to the model |
| 105 | + input_list.append( |
| 106 | + FunctionCallOutput( |
| 107 | + type="function_call_output", |
| 108 | + call_id=item.call_id, |
| 109 | + output=json.dumps({"horoscope": horoscope}), |
| 110 | + ) |
| 111 | + ) |
| 112 | + |
| 113 | + print("Final input:") |
| 114 | + print(input_list) |
| 115 | + |
| 116 | + response = openai_client.responses.create( |
| 117 | + input=input_list, |
| 118 | + previous_response_id=response.id, |
| 119 | + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, |
| 120 | + ) |
| 121 | + print(f"Response output: {response.output_text} (id: {response.id})") |
| 122 | + |
| 123 | + data_source_config = {"type": "azure_ai_source", "scenario": "responses"} |
| 124 | + testing_criteria = [ |
| 125 | + { |
| 126 | + "type": "azure_ai_evaluator", |
| 127 | + "name": "tool_call_accuracy", |
| 128 | + "evaluator_name": "builtin.tool_call_accuracy", |
| 129 | + "initialization_parameters": { |
| 130 | + "deployment_name": f"{model_deployment_name}" |
| 131 | + } |
| 132 | + } |
| 133 | + ] |
| 134 | + eval_object = openai_client.evals.create( |
| 135 | + name="Agent Response Evaluation", |
| 136 | + data_source_config=data_source_config, # type: ignore |
| 137 | + testing_criteria=testing_criteria, # type: ignore |
| 138 | + ) |
| 139 | + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") |
| 140 | + |
| 141 | + data_source = { |
| 142 | + "type": "azure_ai_responses", |
| 143 | + "item_generation_params": { |
| 144 | + "type": "response_retrieval", |
| 145 | + "data_mapping": {"response_id": "{{item.resp_id}}"}, |
| 146 | + "source": {"type": "file_content", "content": [{"item": {"resp_id": response.id}}]}, |
| 147 | + }, |
| 148 | + } |
| 149 | + |
| 150 | + response_eval_run: Union[RunCreateResponse, RunRetrieveResponse] = openai_client.evals.runs.create( |
| 151 | + eval_id=eval_object.id, name=f"Evaluation Run for Agent {agent.name}", data_source=data_source # type: ignore |
| 152 | + ) |
| 153 | + print(f"Evaluation run created (id: {response_eval_run.id})") |
| 154 | + |
| 155 | + while response_eval_run.status not in ["completed", "failed"]: |
| 156 | + response_eval_run = openai_client.evals.runs.retrieve(run_id=response_eval_run.id, eval_id=eval_object.id) |
| 157 | + print(f"Waiting for eval run to complete... current status: {response_eval_run.status}") |
| 158 | + time.sleep(5) |
| 159 | + |
| 160 | + if response_eval_run.status == "completed": |
| 161 | + print("\n✓ Evaluation run completed successfully!") |
| 162 | + print(f"Result Counts: {response_eval_run.result_counts}") |
| 163 | + |
| 164 | + output_items = list( |
| 165 | + openai_client.evals.runs.output_items.list(run_id=response_eval_run.id, eval_id=eval_object.id) |
| 166 | + ) |
| 167 | + print(f"\nOUTPUT ITEMS (Total: {len(output_items)})") |
| 168 | + print(f"Eval Run Report URL: {response_eval_run.report_url}") |
| 169 | + |
| 170 | + print(f"{'-'*60}") |
| 171 | + pprint(output_items) |
| 172 | + print(f"{'-'*60}") |
| 173 | + else: |
| 174 | + print(f"Eval Run Report URL: {response_eval_run.report_url}") |
| 175 | + print("\n✗ Evaluation run failed.") |
| 176 | + |
| 177 | + openai_client.evals.delete(eval_id=eval_object.id) |
| 178 | + print("Evaluation deleted") |
| 179 | + |
| 180 | + project_client.agents.delete(agent_name=agent.name) |
| 181 | + print("Agent deleted") |
0 commit comments