From 6b7cba3af833518a1f9d32d6d68ded7a733a1e5f Mon Sep 17 00:00:00 2001 From: Ijtaba Hussain Date: Thu, 3 Jul 2025 19:56:28 +0100 Subject: [PATCH] Improve MultiIndex label rename checks --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 5 +++ pandas/core/generic.py | 42 ++++++++++++++++------- pandas/tests/frame/methods/test_rename.py | 24 +++++++++++++ 4 files changed, 60 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 94e375615d122..24ba7457f5a24 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -904,6 +904,7 @@ Other - Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`) - Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`) - Bug in :meth:`DataFrame.query` which raised an exception when querying integer column names using backticks. (:issue:`60494`) +- Bug in :meth:`DataFrame.rename` where checks on argument errors="raise" are not consistent with the actual transformation applied (:issue:`55169`) - Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.sort_values` where sorting by a column explicitly named ``None`` raised a ``KeyError`` instead of sorting by the column as expected. (:issue:`61512`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8053c17437c5e..a2ada2168c8b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5677,6 +5677,11 @@ def rename( level : int or level name, default None In case of a MultiIndex, only rename labels in the specified level. + + .. note:: + Labels are renamed individually, and not via tuples across + MultiIndex levels + errors : {'ignore', 'raise'}, default 'ignore' If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, or `columns` contains labels that are not present in the Index diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 66188d9e91232..2a06cd7f09b43 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1047,18 +1047,36 @@ def _rename( # GH 13473 if not callable(replacements): - if ax._is_multi and level is not None: - indexer = ax.get_level_values(level).get_indexer_for(replacements) - else: - indexer = ax.get_indexer_for(replacements) - - if errors == "raise" and len(indexer[indexer == -1]): - missing_labels = [ - label - for index, label in enumerate(replacements) - if indexer[index] == -1 - ] - raise KeyError(f"{missing_labels} not found in axis") + if errors == "raise": + missing_labels = [] + for replacement in replacements: + if ax._is_multi: + indexers = [ + ax.get_level_values(i).get_indexer_for([replacement]) + for i in range(ax.nlevels) + if i == level or level is None + ] + else: + indexers = [ax.get_indexer_for([replacement])] + + found_anywhere = any(any(indexer != -1) for indexer in indexers) + if not found_anywhere: + missing_labels.append(replacement) + + if len(missing_labels) > 0: + error = f"{missing_labels} not found in axis" + if ax._is_multi: + tuple_rename_tried = any( + type(label) is tuple and label in ax + for label in missing_labels + ) + if tuple_rename_tried: + error += ( + ". Please provide individual labels for " + "replacement, and not tuples across " + "MultiIndex levels" + ) + raise KeyError(error) new_index = ax._transform_index(f, level=level) result._set_axis_nocheck(new_index, axis=axis_no, inplace=True) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 6153a168476d4..e0cd18c1cb314 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -164,6 +164,16 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) + def test_rename_multiindex_with_checks(self): + df = DataFrame({("a", "count"): [1, 2], ("a", "sum"): [3, 4]}) + renamed = df.rename( + columns={"a": "b", "count": "number_of", "sum": "total"}, errors="raise" + ) + + new_columns = MultiIndex.from_tuples([("b", "number_of"), ("b", "total")]) + + tm.assert_index_equal(renamed.columns, new_columns) + def test_rename_nocopy(self, float_frame): renamed = float_frame.rename(columns={"C": "foo"}) @@ -221,6 +231,20 @@ def test_rename_errors_raises(self): with pytest.raises(KeyError, match="'E'] not found in axis"): df.rename(columns={"A": "a", "E": "e"}, errors="raise") + def test_rename_error_raised_for_label_across_multiindex_levels(self): + df = DataFrame([{"a": 1, "b": 2}, {"a": 3, "b": 4}]) + df = df.groupby("a").agg({"b": ("count", "sum")}) + with pytest.raises( + KeyError, + match=( + "\\[\\('b', 'count'\\)\\] not found " + "in axis\\. Please provide individual " + "labels for replacement, and not " + "tuples across MultiIndex levels" + ), + ): + df.rename(columns={("b", "count"): "new"}, errors="raise") + @pytest.mark.parametrize( "mapper, errors, expected_columns", [