|
18 | 18 |
|
19 | 19 | from pydantic_evals.evaluators import EvaluationReason, EvaluatorContext
|
20 | 20 | from pydantic_evals.evaluators.common import (
|
21 |
| - DEFAULT_EVALUATORS, |
22 | 21 | Contains,
|
23 | 22 | Equals,
|
24 | 23 | EqualsExpected,
|
|
27 | 26 | LLMJudge,
|
28 | 27 | MaxDuration,
|
29 | 28 | OutputConfig,
|
30 |
| - Python, |
31 | 29 | )
|
32 | 30 | from pydantic_evals.otel._context_in_memory_span_exporter import context_subtree
|
33 | 31 | from pydantic_evals.otel._errors import SpanTreeRecordingError
|
@@ -395,68 +393,6 @@ async def test_llm_judge_evaluator_with_model_settings(mocker: MockerFixture):
|
395 | 393 | )
|
396 | 394 |
|
397 | 395 |
|
398 |
| -async def test_python(): |
399 |
| - """Test Python evaluator.""" |
400 |
| - evaluator = Python(expression='ctx.output > 0') |
401 |
| - |
402 |
| - # Test with valid expression |
403 |
| - assert evaluator.evaluate(MockContext(output=42)) is True |
404 |
| - assert evaluator.evaluate(MockContext(output=-1)) is False |
405 |
| - |
406 |
| - # Test with invalid expression |
407 |
| - evaluator_invalid = Python(expression='invalid syntax') |
408 |
| - with pytest.raises(SyntaxError): |
409 |
| - evaluator_invalid.evaluate(MockContext(output=42)) |
410 |
| - |
411 |
| - |
412 |
| -async def test_python_evaluator(): |
413 |
| - """Test Python evaluator.""" |
414 |
| - ctx = EvaluatorContext( |
415 |
| - name='test', |
416 |
| - inputs={'x': 42}, |
417 |
| - metadata=None, |
418 |
| - expected_output=None, |
419 |
| - output={'y': 84}, |
420 |
| - duration=0.0, |
421 |
| - _span_tree=SpanTreeRecordingError('did not record spans'), |
422 |
| - attributes={}, |
423 |
| - metrics={}, |
424 |
| - ) |
425 |
| - |
426 |
| - # Test simple expression |
427 |
| - evaluator = Python(expression='ctx.output["y"] == 84') |
428 |
| - assert evaluator.evaluate(ctx) is True |
429 |
| - |
430 |
| - # Test accessing inputs |
431 |
| - evaluator = Python(expression='ctx.inputs["x"] * 2 == ctx.output["y"]') |
432 |
| - assert evaluator.evaluate(ctx) is True |
433 |
| - |
434 |
| - # Test complex expression |
435 |
| - evaluator = Python(expression='all(k in ctx.output for k in ["y"])') |
436 |
| - assert evaluator.evaluate(ctx) is True |
437 |
| - |
438 |
| - # Test invalid expression |
439 |
| - evaluator = Python(expression='invalid syntax') |
440 |
| - with pytest.raises(SyntaxError): |
441 |
| - evaluator.evaluate(ctx) |
442 |
| - |
443 |
| - # Test expression with undefined variables |
444 |
| - evaluator = Python(expression='undefined_var') |
445 |
| - with pytest.raises(NameError): |
446 |
| - evaluator.evaluate(ctx) |
447 |
| - |
448 |
| - # Test expression with type error |
449 |
| - evaluator = Python(expression='ctx.output + 1') # Can't add dict and int |
450 |
| - with pytest.raises(TypeError): |
451 |
| - evaluator.evaluate(ctx) |
452 |
| - |
453 |
| - |
454 |
| -def test_default_evaluators(): |
455 |
| - """Test DEFAULT_EVALUATORS tuple.""" |
456 |
| - # Verify that Python evaluator is not included for security reasons |
457 |
| - assert Python not in DEFAULT_EVALUATORS |
458 |
| - |
459 |
| - |
460 | 396 | async def test_span_query_evaluator(capfire: CaptureLogfire):
|
461 | 397 | """Test HasMatchingSpan evaluator."""
|
462 | 398 | # Create a span tree with a known structure
|
|
0 commit comments