@@ -32,6 +32,14 @@ def f(a):
32
32
return a
33
33
34
34
index = MultiIndex .from_product (map (f , args ), names = names )
35
+ if isinstance (fill_value , dict ):
36
+ # fill_value is a dict mapping column names to fill values
37
+ # -> reindex column by column (reindex itself does not support this)
38
+ res = {}
39
+ for col in result .columns :
40
+ res [col ] = result [col ].reindex (index , fill_value = fill_value [col ])
41
+ return DataFrame (res , index = index ).sort_index ()
42
+
35
43
return result .reindex (index , fill_value = fill_value ).sort_index ()
36
44
37
45
@@ -340,18 +348,14 @@ def test_apply(ordered):
340
348
341
349
342
350
@pytest .mark .filterwarnings ("ignore:invalid value encountered in cast:RuntimeWarning" )
343
- def test_observed (request , using_infer_string , observed ):
351
+ def test_observed (observed , using_infer_string ):
344
352
# multiple groupers, don't re-expand the output space
345
353
# of the grouper
346
354
# gh-14942 (implement)
347
355
# gh-10132 (back-compat)
348
356
# gh-8138 (back-compat)
349
357
# gh-8869
350
358
351
- if using_infer_string and not observed :
352
- # TODO(infer_string) this fails with filling the string column with 0
353
- request .applymarker (pytest .mark .xfail (reason = "TODO(infer_string)" ))
354
-
355
359
cat1 = Categorical (["a" , "a" , "b" , "b" ], categories = ["a" , "b" , "z" ], ordered = True )
356
360
cat2 = Categorical (["c" , "d" , "c" , "d" ], categories = ["c" , "d" , "y" ], ordered = True )
357
361
df = DataFrame ({"A" : cat1 , "B" : cat2 , "values" : [1 , 2 , 3 , 4 ]})
@@ -379,11 +383,18 @@ def test_observed(request, using_infer_string, observed):
379
383
result = gb .sum ()
380
384
if not observed :
381
385
expected = cartesian_product_for_groupers (
382
- expected , [cat1 , cat2 ], list ("AB" ), fill_value = 0
386
+ expected ,
387
+ [cat1 , cat2 ],
388
+ list ("AB" ),
389
+ fill_value = {"values" : 0 , "C" : "" } if using_infer_string else 0 ,
383
390
)
384
391
385
392
tm .assert_frame_equal (result , expected )
386
393
394
+ result = gb ["C" ].sum ()
395
+ expected = expected ["C" ]
396
+ tm .assert_series_equal (result , expected )
397
+
387
398
# https://github.com/pandas-dev/pandas/issues/8138
388
399
d = {
389
400
"cat" : Categorical (
0 commit comments