-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Initial Checks
- I confirm that I'm using the latest version of Pydantic AI
- I confirm that I searched for my issue in https://github.com/pydantic/pydantic-ai/issues before opening this issue
Description
Running the example MyEvaluator
and logging to Logfire leads to an error
- Example from https://ai.pydantic.dev/evals/#evaluators:~:text=run%20%22as%20is%22)-,Evaluation%20Process,-The%20evaluation%20process
Failed to render details panel. Most likely, the record attributes do not match the expected panel data type.

In my console I can see the expected output

Full Raw Data from Logfire
{
"created_at": 1745330098358.305,
"start_timestamp": "2025-04-22T13:54:56.700515Z",
"end_timestamp": "2025-04-22T13:54:56.758743Z",
"trace_id": "01965dc7ca3c485d375962d4b734f177",
"span_id": "8c46ea45d9e9801e",
"kind": "span",
"level": 9,
"parent_span_id": null,
"span_name": "evaluate {name}",
"message": "evaluate guess_city",
"is_exception": false,
"exception_type": null,
"tags": [],
"otel_scope_name": "pydantic-evals",
"otel_scope_version": "3.14.0",
"service_name": "evals",
"service_version": null,
"http_response_status_code": null,
"gen_ai_operation_name": null,
"gen_ai_request_model": null,
"gen_ai_response_model": null,
"gen_ai_system": null,
"gen_ai_usage_input_tokens": 0,
"gen_ai_usage_output_tokens": 0,
"matched_filter": true,
"is_extra_span": false,
"day": "2025-04-22",
"duration": 0.058228,
"otel_status_code": "UNSET",
"otel_status_message": null,
"otel_links": [],
"otel_events": [],
"url_path": null,
"url_query": null,
"url_full": null,
"http_route": null,
"http_method": null,
"attributes": {
"averages": {
"name": "Averages",
"scores": {
"MyEvaluator": 1
},
"labels": {},
"metrics": {},
"assertions": 1,
"task_duration": 0.000249,
"total_duration": 0.001984
},
"cases": [
{
"name": "simple_case",
"inputs": "What is the capital of France?",
"metadata": {
"difficulty": "easy"
},
"expected_output": "Paris",
"output": "Paris",
"metrics": {},
"attributes": {},
"scores": {
"MyEvaluator": {
"name": "MyEvaluator",
"value": 1,
"reason": null,
"source": {
"name": "MyEvaluator",
"arguments": null
}
}
},
"labels": {},
"assertions": {
"IsInstance": {
"name": "IsInstance",
"value": true,
"reason": null,
"source": {
"name": "IsInstance",
"arguments": [
"str"
]
}
}
},
"task_duration": 0.000249,
"total_duration": 0.001984,
"trace_id": "01965dc7ca3c485d375962d4b734f177",
"span_id": "6310642f2d6c41d0"
}
],
"code.filepath": "test.py",
"code.lineno": 45,
"logfire.msg_template": "evaluate {name}",
"name": "guess_city"
},
"attributes_json_schema": "{\"type\":\"object\",\"properties\":{\"averages\":{\"type\":\"object\",\"title\":\"ReportCaseAggregate\",\"x-python-datatype\":\"PydanticModel\"},\"cases\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"title\":\"ReportCase\",\"x-python-datatype\":\"PydanticModel\",\"properties\":{\"scores\":{\"type\":\"object\",\"properties\":{\"MyEvaluator\":{\"type\":\"object\",\"title\":\"EvaluationResult\",\"x-python-datatype\":\"dataclass\",\"properties\":{\"source\":{\"type\":\"object\",\"title\":\"MyEvaluator\",\"x-python-datatype\":\"dataclass\"}}}}},\"assertions\":{\"type\":\"object\",\"properties\":{\"IsInstance\":{\"type\":\"object\",\"title\":\"EvaluationResult\",\"x-python-datatype\":\"dataclass\",\"properties\":{\"source\":{\"type\":\"object\",\"title\":\"IsInstance\",\"x-python-datatype\":\"dataclass\"}}}}}}}},\"name\":{}}}",
"otel_scope_attributes": {},
"service_namespace": "",
"service_instance_id": "737e6cc97d7e4feeada8e3ac031618b1",
"process_pid": 97902,
"otel_resource_attributes": {
"process.pid": 97902,
"process.runtime.description": "3.13.0 (main, Oct 16 2024, 08:05:40) [Clang 18.1.8 ]",
"process.runtime.name": "cpython",
"process.runtime.version": "3.13.0",
"service.instance.id": "737e6cc97d7e4feeada8e3ac031618b1",
"service.name": "evals",
"telemetry.sdk.language": "python",
"telemetry.sdk.name": "opentelemetry",
"telemetry.sdk.version": "1.32.1"
},
"telemetry_sdk_name": "opentelemetry",
"telemetry_sdk_language": "python",
"telemetry_sdk_version": "1.32.1",
"deployment_environment": null
}
Example Code
import logfire
from dotenv import load_dotenv
from pydantic_evals import Case, Dataset
from pydantic_evals.evaluators import Evaluator, EvaluatorContext, IsInstance
load_dotenv()
# Configure logging to logfire if LOGFIRE_TOKEN is set in environment
logfire.configure(
send_to_logfire="if-token-present",
environment="development",
service_name="evals",
)
case1 = Case(
name="simple_case",
inputs="What is the capital of France?",
expected_output="Paris",
metadata={"difficulty": "easy"},
)
class MyEvaluator(Evaluator[str, str]):
def evaluate(self, ctx: EvaluatorContext[str, str]) -> float:
if ctx.output == ctx.expected_output:
return 1.0
elif (
isinstance(ctx.output, str)
and ctx.expected_output.lower() in ctx.output.lower()
):
return 0.8
else:
return 0.0
dataset = Dataset(
cases=[case1],
evaluators=[IsInstance(type_name="str"), MyEvaluator()],
)
async def guess_city(question: str) -> str:
return "Paris"
report = dataset.evaluate_sync(guess_city)
report.print(include_input=True, include_output=True, include_durations=False)
Python, Pydantic AI & LLM client version
"logfire>=3.14.0",
"pydantic-ai-slim[mcp]>=0.1.3",
"pydantic-evals[logfire]>=0.1.3",
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working