17
17
from pandas import DataFrame
18
18
19
19
20
+ OP_SYMBOLS = {
21
+ "__add__" : "+" ,
22
+ "__radd__" : "+" ,
23
+ "__sub__" : "-" ,
24
+ "__rsub__" : "-" ,
25
+ "__mul__" : "*" ,
26
+ "__rmul__" : "*" ,
27
+ "__truediv__" : "/" ,
28
+ "__rtruediv__" : "/" ,
29
+ "__floordiv__" : "//" ,
30
+ "__rfloordiv__" : "//" ,
31
+ "__ge__" : ">=" ,
32
+ "__gt__" : ">" ,
33
+ "__le__" : "<=" ,
34
+ "__lt__" : "<" ,
35
+ "__eq__" : "==" ,
36
+ "__ne__" : "!=" ,
37
+ "__mod__" : "%" ,
38
+ }
39
+
40
+
20
41
def parse_args (df : DataFrame , * args : Any ) -> tuple [Series ]:
21
42
return tuple ([x ._func (df ) if isinstance (x , Expr ) else x for x in args ])
22
43
@@ -29,8 +50,11 @@ def parse_kwargs(df: DataFrame, **kwargs: Any) -> dict[Hashable, Series]:
29
50
30
51
31
52
class Expr :
32
- def __init__ (self , func : Callable [[DataFrame ], Any ]) -> None :
53
+ def __init__ (
54
+ self , func : Callable [[DataFrame ], Any ], repr_str : str | None = None
55
+ ) -> None :
33
56
self ._func = func
57
+ self ._repr_str = repr_str
34
58
35
59
def __call__ (self , df : DataFrame ) -> Series :
36
60
result = self ._func (df )
@@ -43,9 +67,22 @@ def __call__(self, df: DataFrame) -> Series:
43
67
return result
44
68
45
69
def _with_binary_op (self , op : str , other : Any ) -> Expr :
70
+ op_symbol = OP_SYMBOLS .get (op , op )
71
+
46
72
if isinstance (other , Expr ):
47
- return Expr (lambda df : getattr (self ._func (df ), op )(other ._func (df )))
48
- return Expr (lambda df : getattr (self ._func (df ), op )(other ))
73
+ if op .startswith ("__r" ):
74
+ repr_str = f"({ other ._repr_str } { op_symbol } { self ._repr_str } )"
75
+ else :
76
+ repr_str = f"({ self ._repr_str } { op_symbol } { other ._repr_str } )"
77
+ return Expr (
78
+ lambda df : getattr (self ._func (df ), op )(other ._func (df )), repr_str
79
+ )
80
+ else :
81
+ if op .startswith ("__r" ):
82
+ repr_str = f"({ other !r} { op_symbol } { self ._repr_str } )"
83
+ else :
84
+ repr_str = f"({ self ._repr_str } { op_symbol } { other !r} )"
85
+ return Expr (lambda df : getattr (self ._func (df ), op )(other ), repr_str )
49
86
50
87
# Binary ops
51
88
def __add__ (self , other : Any ) -> Expr :
@@ -106,7 +143,31 @@ def func(df: DataFrame, *args: Any, **kwargs: Any) -> Any:
106
143
parsed_kwargs = parse_kwargs (df , ** kwargs )
107
144
return getattr (self (df ), attr )(* parsed_args , ** parsed_kwargs )
108
145
109
- return lambda * args , ** kwargs : Expr (lambda df : func (df , * args , ** kwargs ))
146
+ def wrapper (* args : Any , ** kwargs : Any ) -> Expr :
147
+ # Create a readable representation for method calls
148
+ args_repr = ", " .join (
149
+ repr (arg ._repr_str if isinstance (arg , Expr ) else arg ) for arg in args
150
+ )
151
+ kwargs_repr = ", " .join (
152
+ f"{ k } ={ v ._repr_str if isinstance (v , Expr ) else v !r} "
153
+ for k , v in kwargs .items ()
154
+ )
155
+
156
+ all_args = []
157
+ if args_repr :
158
+ all_args .append (args_repr )
159
+ if kwargs_repr :
160
+ all_args .append (kwargs_repr )
161
+
162
+ args_str = ", " .join (all_args )
163
+ repr_str = f"{ self ._repr_str } .{ attr } ({ args_str } )"
164
+
165
+ return Expr (lambda df : func (df , * args , ** kwargs ), repr_str )
166
+
167
+ return wrapper
168
+
169
+ def __repr__ (self ) -> str :
170
+ return self ._repr_str or "Expr(...)"
110
171
111
172
# Namespaces
112
173
@property
@@ -135,14 +196,16 @@ def struct(self) -> NamespaceExpr:
135
196
136
197
137
198
class NamespaceExpr :
138
- def __init__ (self , func : Callable [[ DataFrame ], Any ] , namespace : str ) -> None :
199
+ def __init__ (self , func : Expr , namespace : str ) -> None :
139
200
self ._func = func
140
201
self ._namespace = namespace
141
202
142
203
def __getattr__ (self , attr : str ) -> Any :
143
204
if isinstance (getattr (getattr (Series , self ._namespace ), attr ), property ):
205
+ repr_str = f"{ self ._func ._repr_str } .{ self ._namespace } .{ attr } "
144
206
return Expr (
145
- lambda df : getattr (getattr (self ._func (df ), self ._namespace ), attr )
207
+ lambda df : getattr (getattr (self ._func (df ), self ._namespace ), attr ),
208
+ repr_str ,
146
209
)
147
210
148
211
def func (df : DataFrame , * args : Any , ** kwargs : Any ) -> Any :
@@ -152,11 +215,32 @@ def func(df: DataFrame, *args: Any, **kwargs: Any) -> Any:
152
215
* parsed_args , ** parsed_kwargs
153
216
)
154
217
155
- return lambda * args , ** kwargs : Expr (lambda df : func (df , * args , ** kwargs ))
218
+ def wrapper (* args : Any , ** kwargs : Any ) -> Expr :
219
+ # Create a readable representation for namespace method calls
220
+ args_repr = ", " .join (
221
+ repr (arg ._repr_str if isinstance (arg , Expr ) else arg ) for arg in args
222
+ )
223
+ kwargs_repr = ", " .join (
224
+ f"{ k } ={ v ._repr_str if isinstance (v , Expr ) else v !r} "
225
+ for k , v in kwargs .items ()
226
+ )
227
+
228
+ all_args = []
229
+ if args_repr :
230
+ all_args .append (args_repr )
231
+ if kwargs_repr :
232
+ all_args .append (kwargs_repr )
233
+
234
+ args_str = ", " .join (all_args )
235
+ repr_str = f"{ self ._func ._repr_str } .{ self ._namespace } .{ attr } ({ args_str } )"
236
+
237
+ return Expr (lambda df : func (df , * args , ** kwargs ), repr_str )
238
+
239
+ return wrapper
156
240
157
241
158
242
def col (col_name : Hashable ) -> Expr :
159
243
if not isinstance (col_name , Hashable ):
160
244
msg = f"Expected Hashable, got: { type (col_name )} "
161
245
raise TypeError (msg )
162
- return Expr (lambda df : df [col_name ])
246
+ return Expr (lambda df : df [col_name ], f"col( { col_name !r } )" )
0 commit comments