diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index 25fbfd318a2..dda32008238 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -247,7 +247,6 @@ def mark_exposed(self) -> None: This also unspills the buffer (unspillable buffers cannot be spilled!). """ - self._manager.spill_to_device_limit() with self.lock: if not self.exposed: diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index c0f42db030e..370e128d41f 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -24,6 +24,7 @@ import rmm import cudf +from cudf.api.extensions import no_default from cudf.api.types import ( _is_categorical_dtype, infer_dtype, @@ -612,7 +613,9 @@ def from_pylibcudf( ) @classmethod - def from_cuda_array_interface(cls, arbitrary: Any) -> Self: + def from_cuda_array_interface( + cls, arbitrary: Any, data_ptr_exposed=no_default + ) -> Self: """ Create a Column from an object implementing the CUDA array interface. @@ -647,9 +650,11 @@ def from_cuda_array_interface(cls, arbitrary: Any) -> Self: else: mask = None + if data_ptr_exposed is no_default: + data_ptr_exposed = cudf.get_option("copy_on_write") column = ColumnBase.from_pylibcudf( plc.Column.from_cuda_array_interface(arbitrary), - data_ptr_exposed=cudf.get_option("copy_on_write"), + data_ptr_exposed=data_ptr_exposed, ) if mask is not None: column = column.set_mask(mask) @@ -2886,6 +2891,16 @@ def as_column( arbitrary = np.asarray(arbitrary) else: arbitrary = cp.asarray(arbitrary) + # Explicitly passing `data_ptr_exposed` to + # reuse existing memory created by cupy here + column = ColumnBase.from_cuda_array_interface( + arbitrary, data_ptr_exposed=False + ) + if nan_as_null is not False: + column = column.nans_to_nulls() + if dtype is not None: + column = column.astype(dtype) + return column return as_column( arbitrary, nan_as_null=nan_as_null, dtype=dtype, length=length ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 948b71ce296..27ad1b5d1d2 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -574,7 +574,7 @@ def as_numerical_column(self, dtype: Dtype) -> NumericalColumn: # If the dtype is a pandas nullable extension type, we need to # float column doesn't have any NaNs. res = self.nans_to_nulls() - res._dtype = dtype + res._dtype = dtype # type: ignore[has-type] return res else: self._dtype = dtype diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 27140a95e8d..8ece1c4672b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3222,7 +3222,10 @@ def set_index( # label-like if is_scalar(col) or isinstance(col, tuple): if col in self._column_names: - data_to_add.append(self[col]._column) + if drop and inplace: + data_to_add.append(self[col]._column) + else: + data_to_add.append(self[col]._column.copy(deep=True)) names.append(col) if drop: to_drop.append(col) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 017099636aa..27ef00d8376 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1949,7 +1949,7 @@ def copy(self, name: Hashable = None, deep: bool = False) -> Self: New index instance. """ name = self.name if name is None else name - col = self._column.copy(deep=True) if deep else self._column + col = self._column.copy(deep=deep) return type(self)._from_column(col, name=name) @_performance_tracking @@ -5505,7 +5505,13 @@ def _as_index( ) return data.copy(deep=copy) elif isinstance(data, Index): - idx = data.copy(deep=copy).rename(name) + if not isinstance(data, cudf.RangeIndex): + idx = type(data)._from_column( + data._column.copy(deep=copy) if copy else data._column, + name=name, + ) + else: + idx = data.copy(deep=copy).rename(name) elif isinstance(data, ColumnBase): raise ValueError("Use cudf.Index._from_column instead.") elif isinstance(data, (pd.RangeIndex, range)): diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 46d407606df..35a2d3170bb 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -619,8 +619,7 @@ def copy(self, deep: bool = True) -> Self: """ return self._from_data( self._data.copy(deep=deep), - # Indexes are immutable so copies can always be shallow. - self.index.copy(deep=False), + self.index.copy(deep=deep), attrs=copy.deepcopy(self.attrs) if deep else self._attrs, ) diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index f9e12437d26..855e333194e 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -6679,10 +6679,6 @@ def pytest_unconfigure(config): "tests/indexes/test_common.py::TestCommon::test_to_frame[uint8-new_name]", "tests/indexes/test_common.py::test_ndarray_compat_properties[multi]", "tests/indexes/test_common.py::test_ndarray_compat_properties[tuples]", - "tests/indexes/test_common.py::test_sort_values_invalid_na_position[nullable_int-None]", - "tests/indexes/test_common.py::test_sort_values_invalid_na_position[nullable_int-middle]", - "tests/indexes/test_common.py::test_sort_values_with_missing[nullable_int-first]", - "tests/indexes/test_common.py::test_sort_values_with_missing[nullable_int-last]", "tests/indexes/test_datetimelike.py::TestDatetimeLike::test_argsort_matches_array[simple_index1]", "tests/indexes/test_datetimelike.py::TestDatetimeLike::test_argsort_matches_array[simple_index2]", "tests/indexes/test_indexing.py::TestGetIndexer::test_get_indexer_base[multi]", diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py index 01841d11647..3a8bd36fd96 100644 --- a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py +++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py @@ -12,4 +12,6 @@ def test_rename_shallow_copy(): idx = cudf.Index([1]) result = idx.rename("a") - assert idx._column is result._column + assert idx._column.base_data.get_ptr( + mode="read" + ) == result._column.base_data.get_ptr(mode="read")