|
| 1 | +from datetime import datetime |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import pandas._testing as tm |
| 7 | + |
| 8 | + |
| 9 | +@pytest.mark.parametrize( |
| 10 | + ("expr", "expected_values", "expected_str"), |
| 11 | + [ |
| 12 | + (pd.col("a"), [1, 2], "col('a')"), |
| 13 | + (pd.col("a") * 2, [2, 4], "(col('a') * 2)"), |
| 14 | + (pd.col("a").sum(), [3, 3], "col('a').sum()"), |
| 15 | + (pd.col("a") + 1, [2, 3], "(col('a') + 1)"), |
| 16 | + (1 + pd.col("a"), [2, 3], "(1 + col('a'))"), |
| 17 | + (pd.col("a") - 1, [0, 1], "(col('a') - 1)"), |
| 18 | + (1 - pd.col("a"), [0, -1], "(1 - col('a'))"), |
| 19 | + (pd.col("a") * 1, [1, 2], "(col('a') * 1)"), |
| 20 | + (1 * pd.col("a"), [1, 2], "(1 * col('a'))"), |
| 21 | + (pd.col("a") / 1, [1.0, 2.0], "(col('a') / 1)"), |
| 22 | + (1 / pd.col("a"), [1.0, 0.5], "(1 / col('a'))"), |
| 23 | + (pd.col("a") // 1, [1, 2], "(col('a') // 1)"), |
| 24 | + (1 // pd.col("a"), [1, 0], "(1 // col('a'))"), |
| 25 | + (pd.col("a") % 1, [0, 0], "(col('a') % 1)"), |
| 26 | + (1 % pd.col("a"), [0, 1], "(1 % col('a'))"), |
| 27 | + (pd.col("a") > 1, [False, True], "(col('a') > 1)"), |
| 28 | + (pd.col("a") >= 1, [True, True], "(col('a') >= 1)"), |
| 29 | + (pd.col("a") < 1, [False, False], "(col('a') < 1)"), |
| 30 | + (pd.col("a") <= 1, [True, False], "(col('a') <= 1)"), |
| 31 | + (pd.col("a") == 1, [True, False], "(col('a') == 1)"), |
| 32 | + ], |
| 33 | +) |
| 34 | +def test_col_simple( |
| 35 | + expr: pd.Expr, expected_values: list[object], expected_str: str |
| 36 | +) -> None: |
| 37 | + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) |
| 38 | + result = df.assign(c=expr) |
| 39 | + expected = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": expected_values}) |
| 40 | + tm.assert_frame_equal(result, expected) |
| 41 | + assert str(expr) == expected_str |
| 42 | + |
| 43 | + |
| 44 | +@pytest.mark.parametrize( |
| 45 | + ("expr", "expected_values", "expected_str"), |
| 46 | + [ |
| 47 | + (pd.col("a").dt.year, [2020], "col('a').dt.year"), |
| 48 | + (pd.col("a").dt.strftime("%B"), ["January"], "col('a').dt.strftime('%B')"), |
| 49 | + (pd.col("b").str.upper(), ["FOO"], "col('b').str.upper()"), |
| 50 | + ], |
| 51 | +) |
| 52 | +def test_namespaces( |
| 53 | + expr: pd.Expr, expected_values: list[object], expected_str: str |
| 54 | +) -> None: |
| 55 | + df = pd.DataFrame({"a": [datetime(2020, 1, 1)], "b": ["foo"]}) |
| 56 | + result = df.assign(c=expr) |
| 57 | + expected = pd.DataFrame( |
| 58 | + {"a": [datetime(2020, 1, 1)], "b": ["foo"], "c": expected_values} |
| 59 | + ) |
| 60 | + tm.assert_frame_equal(result, expected, check_dtype=False) |
| 61 | + assert str(expr) == expected_str |
| 62 | + |
| 63 | + |
| 64 | +def test_invalid() -> None: |
| 65 | + df = pd.DataFrame({"a": [1, 2]}) |
| 66 | + with pytest.raises(ValueError, match="did you mean"): |
| 67 | + df.assign(c=pd.col("b").mean()) |
0 commit comments