Skip to content

Commit 9ff14a3

Browse files
authored
BUG: Avoid copying categorical codes if copy=False (#62000)
1 parent 8e44c0e commit 9ff14a3

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,7 @@ Bug fixes
687687
Categorical
688688
^^^^^^^^^^^
689689
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
690+
- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
690691
- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
691692
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
692693
-

pandas/core/arrays/categorical.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
575575
# GH 10696/18593/18630
576576
dtype = self.dtype.update_dtype(dtype)
577577
self = self.copy() if copy else self
578-
result = self._set_dtype(dtype)
578+
result = self._set_dtype(dtype, copy=False)
579579

580580
elif isinstance(dtype, ExtensionDtype):
581581
return super().astype(dtype, copy=copy)
@@ -945,7 +945,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
945945

946946
super().__init__(self._ndarray, new_dtype)
947947

948-
def _set_dtype(self, dtype: CategoricalDtype) -> Self:
948+
def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
949949
"""
950950
Internal method for directly updating the CategoricalDtype
951951
@@ -958,7 +958,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
958958
We don't do any validation here. It's assumed that the dtype is
959959
a (valid) instance of `CategoricalDtype`.
960960
"""
961-
codes = recode_for_categories(self.codes, self.categories, dtype.categories)
961+
codes = recode_for_categories(
962+
self.codes, self.categories, dtype.categories, copy
963+
)
962964
return type(self)._simple_new(codes, dtype=dtype)
963965

964966
def set_ordered(self, value: bool) -> Self:

pandas/tests/arrays/categorical/test_astype.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ def test_astype_category(self, dtype_ordered, ordered):
130130
expected = cat
131131
tm.assert_categorical_equal(result, expected)
132132

133+
def test_astype_category_copy_false_nocopy_codes(self):
134+
# GH#62000
135+
cat = Categorical([3, 2, 4, 1])
136+
new = cat.astype("category", copy=False)
137+
assert tm.shares_memory(new.codes, cat.codes)
138+
new = cat.astype("category", copy=True)
139+
assert not tm.shares_memory(new.codes, cat.codes)
140+
133141
def test_astype_object_datetime_categories(self):
134142
# GH#40754
135143
cat = Categorical(to_datetime(["2021-03-27", NaT]))

0 commit comments

Comments
 (0)