Skip to content

Failed to render details panel #1567

@andrewginns

Description

@andrewginns

Initial Checks

Description

Running the example MyEvaluator and logging to Logfire leads to an error

Image

In my console I can see the expected output

Image

Full Raw Data from Logfire

{
  "created_at": 1745330098358.305,
  "start_timestamp": "2025-04-22T13:54:56.700515Z",
  "end_timestamp": "2025-04-22T13:54:56.758743Z",
  "trace_id": "01965dc7ca3c485d375962d4b734f177",
  "span_id": "8c46ea45d9e9801e",
  "kind": "span",
  "level": 9,
  "parent_span_id": null,
  "span_name": "evaluate {name}",
  "message": "evaluate guess_city",
  "is_exception": false,
  "exception_type": null,
  "tags": [],
  "otel_scope_name": "pydantic-evals",
  "otel_scope_version": "3.14.0",
  "service_name": "evals",
  "service_version": null,
  "http_response_status_code": null,
  "gen_ai_operation_name": null,
  "gen_ai_request_model": null,
  "gen_ai_response_model": null,
  "gen_ai_system": null,
  "gen_ai_usage_input_tokens": 0,
  "gen_ai_usage_output_tokens": 0,
  "matched_filter": true,
  "is_extra_span": false,
  "day": "2025-04-22",
  "duration": 0.058228,
  "otel_status_code": "UNSET",
  "otel_status_message": null,
  "otel_links": [],
  "otel_events": [],
  "url_path": null,
  "url_query": null,
  "url_full": null,
  "http_route": null,
  "http_method": null,
  "attributes": {
    "averages": {
      "name": "Averages",
      "scores": {
        "MyEvaluator": 1
      },
      "labels": {},
      "metrics": {},
      "assertions": 1,
      "task_duration": 0.000249,
      "total_duration": 0.001984
    },
    "cases": [
      {
        "name": "simple_case",
        "inputs": "What is the capital of France?",
        "metadata": {
          "difficulty": "easy"
        },
        "expected_output": "Paris",
        "output": "Paris",
        "metrics": {},
        "attributes": {},
        "scores": {
          "MyEvaluator": {
            "name": "MyEvaluator",
            "value": 1,
            "reason": null,
            "source": {
              "name": "MyEvaluator",
              "arguments": null
            }
          }
        },
        "labels": {},
        "assertions": {
          "IsInstance": {
            "name": "IsInstance",
            "value": true,
            "reason": null,
            "source": {
              "name": "IsInstance",
              "arguments": [
                "str"
              ]
            }
          }
        },
        "task_duration": 0.000249,
        "total_duration": 0.001984,
        "trace_id": "01965dc7ca3c485d375962d4b734f177",
        "span_id": "6310642f2d6c41d0"
      }
    ],
    "code.filepath": "test.py",
    "code.lineno": 45,
    "logfire.msg_template": "evaluate {name}",
    "name": "guess_city"
  },
  "attributes_json_schema": "{\"type\":\"object\",\"properties\":{\"averages\":{\"type\":\"object\",\"title\":\"ReportCaseAggregate\",\"x-python-datatype\":\"PydanticModel\"},\"cases\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"title\":\"ReportCase\",\"x-python-datatype\":\"PydanticModel\",\"properties\":{\"scores\":{\"type\":\"object\",\"properties\":{\"MyEvaluator\":{\"type\":\"object\",\"title\":\"EvaluationResult\",\"x-python-datatype\":\"dataclass\",\"properties\":{\"source\":{\"type\":\"object\",\"title\":\"MyEvaluator\",\"x-python-datatype\":\"dataclass\"}}}}},\"assertions\":{\"type\":\"object\",\"properties\":{\"IsInstance\":{\"type\":\"object\",\"title\":\"EvaluationResult\",\"x-python-datatype\":\"dataclass\",\"properties\":{\"source\":{\"type\":\"object\",\"title\":\"IsInstance\",\"x-python-datatype\":\"dataclass\"}}}}}}}},\"name\":{}}}",
  "otel_scope_attributes": {},
  "service_namespace": "",
  "service_instance_id": "737e6cc97d7e4feeada8e3ac031618b1",
  "process_pid": 97902,
  "otel_resource_attributes": {
    "process.pid": 97902,
    "process.runtime.description": "3.13.0 (main, Oct 16 2024, 08:05:40) [Clang 18.1.8 ]",
    "process.runtime.name": "cpython",
    "process.runtime.version": "3.13.0",
    "service.instance.id": "737e6cc97d7e4feeada8e3ac031618b1",
    "service.name": "evals",
    "telemetry.sdk.language": "python",
    "telemetry.sdk.name": "opentelemetry",
    "telemetry.sdk.version": "1.32.1"
  },
  "telemetry_sdk_name": "opentelemetry",
  "telemetry_sdk_language": "python",
  "telemetry_sdk_version": "1.32.1",
  "deployment_environment": null
}

Example Code

import logfire
from dotenv import load_dotenv
from pydantic_evals import Case, Dataset
from pydantic_evals.evaluators import Evaluator, EvaluatorContext, IsInstance

load_dotenv()

# Configure logging to logfire if LOGFIRE_TOKEN is set in environment
logfire.configure(
    send_to_logfire="if-token-present",
    environment="development",
    service_name="evals",
)

case1 = Case(
    name="simple_case",
    inputs="What is the capital of France?",
    expected_output="Paris",
    metadata={"difficulty": "easy"},
)


class MyEvaluator(Evaluator[str, str]):
    def evaluate(self, ctx: EvaluatorContext[str, str]) -> float:
        if ctx.output == ctx.expected_output:
            return 1.0
        elif (
            isinstance(ctx.output, str)
            and ctx.expected_output.lower() in ctx.output.lower()
        ):
            return 0.8
        else:
            return 0.0


dataset = Dataset(
    cases=[case1],
    evaluators=[IsInstance(type_name="str"), MyEvaluator()],
)


async def guess_city(question: str) -> str:
    return "Paris"


report = dataset.evaluate_sync(guess_city)
report.print(include_input=True, include_output=True, include_durations=False)

Python, Pydantic AI & LLM client version

"logfire>=3.14.0",
"pydantic-ai-slim[mcp]>=0.1.3",
"pydantic-evals[logfire]>=0.1.3",

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions