Skip to content

Commit bc500f7

Browse files
BUG: adding row to DataFrame with EADtype index loses dtype (#62345)
Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 324b986 commit bc500f7

File tree

13 files changed

+92
-19
lines changed

13 files changed

+92
-19
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@ Other API changes
603603
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
604604
the dtype of the resulting Index (:issue:`60797`)
605605
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
606+
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
606607
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
607608
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
608609
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
@@ -974,8 +975,8 @@ Indexing
974975
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
975976
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
976977
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
978+
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
977979
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
978-
-
979980

980981
Missing
981982
^^^^^^^
@@ -1094,7 +1095,7 @@ Reshaping
10941095
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
10951096
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
10961097
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
1097-
-
1098+
- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)
10981099

10991100
Sparse
11001101
^^^^^^

pandas/core/frame.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10918,6 +10918,13 @@ def _append_internal(
1091810918
),
1091910919
)
1092010920
row_df = other.to_frame().T
10921+
if isinstance(self.index.dtype, ExtensionDtype):
10922+
# GH#41626 retain e.g. CategoricalDtype if reached via
10923+
# df.loc[key] = item
10924+
row_df.index = self.index.array._cast_pointwise_result(
10925+
row_df.index._values
10926+
)
10927+
1092110928
# infer_objects is needed for
1092210929
# test_append_empty_frame_to_series_with_dateutil_tz
1092310930
other = row_df.infer_objects().rename_axis(index.names)

pandas/core/indexes/category.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
)
1919

2020
from pandas.core.dtypes.common import is_scalar
21-
from pandas.core.dtypes.concat import concat_compat
2221
from pandas.core.dtypes.dtypes import CategoricalDtype
2322
from pandas.core.dtypes.missing import (
2423
is_valid_na_for_dtype,
@@ -519,17 +518,3 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
519518
"""
520519
mapped = self._values.map(mapper, na_action=na_action)
521520
return Index(mapped, name=self.name)
522-
523-
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
524-
# if calling index is category, don't check dtype of others
525-
try:
526-
cat = Categorical._concat_same_type(
527-
[self._is_dtype_compat(c) for c in to_concat]
528-
)
529-
except TypeError:
530-
# not all to_concat elements are among our categories (or NA)
531-
532-
res = concat_compat([x._values for x in to_concat])
533-
return Index(res, name=name)
534-
else:
535-
return type(self)._simple_new(cat, name=name)

pandas/core/reshape/merge.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,11 @@ def _create_join_index(
14831483
mask = indexer == -1
14841484
if np.any(mask):
14851485
fill_value = na_value_for_dtype(index.dtype, compat=False)
1486-
index = index.append(Index([fill_value]))
1486+
if not index._can_hold_na:
1487+
new_index = Index([fill_value])
1488+
else:
1489+
new_index = Index([fill_value], dtype=index.dtype)
1490+
index = index.append(new_index)
14871491
if indexer is None:
14881492
return index.copy()
14891493
return index.take(indexer)

pandas/tests/extension/base/setitem.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.core.dtypes.common import is_hashable
5+
46
import pandas as pd
57
import pandas._testing as tm
68

@@ -310,6 +312,22 @@ def test_setitem_expand_with_extension(self, data):
310312
result.loc[:, "B"] = data
311313
tm.assert_frame_equal(result, expected)
312314

315+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
316+
# GH#41626 retain index.dtype in setitem-with-expansion
317+
if not is_hashable(data[0]):
318+
pytest.skip("Test does not apply to non-hashable data.")
319+
data = data.unique()
320+
expected = pd.DataFrame({"A": range(len(data))}, index=data)
321+
df = expected.iloc[:-1]
322+
ser = df["A"]
323+
item = data[-1]
324+
325+
df.loc[item] = len(data) - 1
326+
tm.assert_frame_equal(df, expected)
327+
328+
ser.loc[item] = len(data) - 1
329+
tm.assert_series_equal(ser, expected["A"])
330+
313331
def test_setitem_frame_invalid_length(self, data):
314332
df = pd.DataFrame({"A": [1] * len(data)})
315333
xpr = (

pandas/tests/extension/test_arrow.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,15 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op):
10671067
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
10681068
tm.assert_series_equal(result, expected)
10691069

1070+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
1071+
pa_dtype = data.dtype.pyarrow_dtype
1072+
if pa.types.is_date(pa_dtype):
1073+
mark = pytest.mark.xfail(
1074+
reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]"
1075+
)
1076+
request.applymarker(mark)
1077+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
1078+
10701079

10711080
class TestLogicalOps:
10721081
"""Various Series and DataFrame logical ops methods."""

pandas/tests/extension/test_interval.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ def test_EA_types(self, engine, data, request):
126126
def test_astype_str(self, data):
127127
super().test_astype_str(data)
128128

129+
@pytest.mark.xfail(
130+
reason="Test is invalid for IntervalDtype, needs to be adapted for "
131+
"this dtype with an index with index._index_as_unique."
132+
)
133+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
134+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
135+
129136

130137
# TODO: either belongs in tests.arrays.interval or move into base tests.
131138
def test_fillna_non_scalar_raises(data_missing):

pandas/tests/extension/test_masked.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,9 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
360360
)
361361
)
362362
tm.assert_series_equal(result, expected)
363+
364+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
365+
if data.dtype.kind == "b":
366+
mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object")
367+
request.applymarker(mark)
368+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)

pandas/tests/extension/test_numpy.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,12 @@ def test_index_from_listlike_with_dtype(self, data):
421421
def test_EA_types(self, engine, data, request):
422422
super().test_EA_types(engine, data, request)
423423

424+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
425+
if isinstance(data[-1], tuple):
426+
mark = pytest.mark.xfail(reason="Unpacks tuple")
427+
request.applymarker(mark)
428+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
429+
424430

425431
class Test2DCompat(base.NDArrayBacked2DTests):
426432
pass

pandas/tests/extension/test_string.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,14 @@ def test_arith_series_with_array(
257257
request.applymarker(mark)
258258
super().test_arith_series_with_array(data, all_arithmetic_operators)
259259

260+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(
261+
self, data, request, using_infer_string
262+
):
263+
if not using_infer_string and data.dtype.storage == "python":
264+
mark = pytest.mark.xfail(reason="Casts to object")
265+
request.applymarker(mark)
266+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
267+
260268

261269
class Test2DCompat(base.Dim2CompatTests):
262270
@pytest.fixture(autouse=True)

0 commit comments

Comments
 (0)