From f87de21deb82227c228f19c2783945e0522f73a3 Mon Sep 17 00:00:00 2001 From: skonda29 Date: Wed, 30 Jul 2025 08:18:47 -0700 Subject: [PATCH 1/4] BUG: Fix _validate_key to handle ExtensionArray correctly GH#61311 --- pandas/core/indexing.py | 13 ++++++++++--- pandas/tests/indexing/test_iloc.py | 11 +++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 34a437ba40bd8..18d6821883be7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1609,9 +1609,16 @@ def _validate_key(self, key, axis: AxisInt) -> None: if not is_numeric_dtype(arr.dtype): raise IndexError(f".iloc requires numeric indexers, got {arr}") - # check that the key does not exceed the maximum size of the index - if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): - raise IndexError("positional indexers are out-of-bounds") + if len(arr): + # convert to numpy array for min/max with ExtensionArrays + if hasattr(arr, "to_numpy"): + np_arr = arr.to_numpy() + else: + np_arr = np.asarray(arr) + + # check that the key does not exceed the maximum size + if np.max(np_arr) >= len_axis or np.min(np_arr) < -len_axis: + raise IndexError("positional indexers are out-of-bounds") else: raise ValueError(f"Can only index by location with a [{self._valid_types}]") diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 3be69617cad43..8b8d37c341473 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1478,3 +1478,14 @@ def test_iloc_nullable_int64_size_1_nan(self): result = DataFrame({"a": ["test"], "b": [np.nan]}) with pytest.raises(TypeError, match="Invalid value"): result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + + def test_iloc_arrow_extension_array(self): + # GH#61311 + pytest.importorskip("pyarrow") + + df = DataFrame({"a": [1, 2], "c": [0, 2], "d": ["c", "a"]}) + + df_arrow = df.convert_dtypes(dtype_backend="pyarrow") + result = df_arrow.iloc[:, df_arrow["c"]] + expected = df_arrow.iloc[:, [0, 2]] + tm.assert_frame_equal(result, expected) From d3039ad9d04a2f2cf81445d78279d7783639b764 Mon Sep 17 00:00:00 2001 From: skonda29 Date: Fri, 1 Aug 2025 09:43:21 -0700 Subject: [PATCH 2/4] use _reduce method for ExtensionArrays --- pandas/core/indexing.py | 13 +++++++------ pandas/tests/indexing/test_iloc.py | 10 +++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 18d6821883be7..c5abdaa7862be 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1610,14 +1610,15 @@ def _validate_key(self, key, axis: AxisInt) -> None: raise IndexError(f".iloc requires numeric indexers, got {arr}") if len(arr): - # convert to numpy array for min/max with ExtensionArrays - if hasattr(arr, "to_numpy"): - np_arr = arr.to_numpy() + # handle ExtensionArray safely using _reduce method else use numpy + if isinstance(arr.dtype, ExtensionDtype): + arr_max = arr._reduce("max") + arr_min = arr._reduce("min") else: - np_arr = np.asarray(arr) + arr_max = np.max(arr) + arr_min = np.min(arr) - # check that the key does not exceed the maximum size - if np.max(np_arr) >= len_axis or np.min(np_arr) < -len_axis: + if arr_max >= len_axis or arr_min < -len_axis: raise IndexError("positional indexers are out-of-bounds") else: raise ValueError(f"Can only index by location with a [{self._valid_types}]") diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 8b8d37c341473..71e7c581c2d17 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1482,10 +1482,10 @@ def test_iloc_nullable_int64_size_1_nan(self): def test_iloc_arrow_extension_array(self): # GH#61311 pytest.importorskip("pyarrow") - df = DataFrame({"a": [1, 2], "c": [0, 2], "d": ["c", "a"]}) - - df_arrow = df.convert_dtypes(dtype_backend="pyarrow") + df_arrow = DataFrame( + {"a": [1, 2], "c": [0, 2], "d": ["c", "a"]} + ).convert_dtypes(dtype_backend="pyarrow") + expected = df.iloc[:, df["c"]] result = df_arrow.iloc[:, df_arrow["c"]] - expected = df_arrow.iloc[:, [0, 2]] - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_dtype=False) From a9c49ea2d1f49b3d022b45001e4878ac4ff057ab Mon Sep 17 00:00:00 2001 From: skonda29 Date: Fri, 1 Aug 2025 09:53:49 -0700 Subject: [PATCH 3/4] update comments --- pandas/core/indexing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c5abdaa7862be..8918f488e4e04 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1610,7 +1610,7 @@ def _validate_key(self, key, axis: AxisInt) -> None: raise IndexError(f".iloc requires numeric indexers, got {arr}") if len(arr): - # handle ExtensionArray safely using _reduce method else use numpy + # handle ExtensionArray using _reduce method else use numpy if isinstance(arr.dtype, ExtensionDtype): arr_max = arr._reduce("max") arr_min = arr._reduce("min") @@ -1618,6 +1618,7 @@ def _validate_key(self, key, axis: AxisInt) -> None: arr_max = np.max(arr) arr_min = np.min(arr) + # check that the key does not exceed the maximum size if arr_max >= len_axis or arr_min < -len_axis: raise IndexError("positional indexers are out-of-bounds") else: From c204a9e40878cfa5527bda8470887f7e6af7bc79 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:21:32 -0700 Subject: [PATCH 4/4] Update pandas/core/indexing.py --- pandas/core/indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8918f488e4e04..12b2590e678d1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1610,7 +1610,6 @@ def _validate_key(self, key, axis: AxisInt) -> None: raise IndexError(f".iloc requires numeric indexers, got {arr}") if len(arr): - # handle ExtensionArray using _reduce method else use numpy if isinstance(arr.dtype, ExtensionDtype): arr_max = arr._reduce("max") arr_min = arr._reduce("min")