diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index b6ef7089a9f..9c544a97caa 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -124,29 +124,13 @@ def overlaps(other) -> ColumnBase: def set_closed( self, closed: Literal["left", "right", "both", "neither"] ) -> Self: - return IntervalColumn( # type: ignore[return-value] - data=None, - size=self.size, - dtype=IntervalDtype(self.dtype.subtype, closed), - mask=self.base_mask, - offset=self.offset, - null_count=self.null_count, - children=self.base_children, # type: ignore[arg-type] + return self._with_type_metadata( # type: ignore[return-value] + IntervalDtype(self.dtype.subtype, closed) ) def as_interval_column(self, dtype: IntervalDtype) -> Self: # type: ignore[override] if isinstance(dtype, IntervalDtype): - return IntervalColumn( # type: ignore[return-value] - data=None, - size=self.size, - dtype=dtype, - mask=self.mask, - offset=self.offset, - null_count=self.null_count, - children=tuple( # type: ignore[arg-type] - child.astype(dtype.subtype) for child in self.children - ), - ) + return self._with_type_metadata(dtype) # type: ignore[return-value] else: raise ValueError("dtype must be IntervalDtype") diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index a7ffed6e0bf..f24342913e8 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -220,7 +220,9 @@ def _with_type_metadata(self: StructColumn, dtype: Dtype) -> StructColumn: mask=self.base_mask, offset=self.offset, null_count=self.null_count, - children=self.base_children, # type: ignore[arg-type] + children=tuple( # type: ignore[arg-type] + child.astype(dtype.subtype) for child in self.base_children + ), # type: ignore[arg-type] ) elif isinstance(dtype, StructDtype): return StructColumn( diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 177eacb0276..5215928377d 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -31,7 +31,6 @@ deserialize_columns, serialize_columns, ) -from cudf.core.column.struct import StructColumn from cudf.core.column_accessor import ColumnAccessor from cudf.core.common import pipe from cudf.core.copy_types import GatherMap @@ -2543,15 +2542,15 @@ def _cov_or_corr(self, func, method_name): ) x, y = str(x), str(y) - column_pair_structs[(x, y)] = StructColumn( - data=None, - dtype=StructDtype( - fields={x: self.obj._data[x].dtype, y: self.obj._data[y]} - ), - children=(self.obj._data[x], self.obj._data[y]), - size=len(self.obj), - offset=0, - ) + struct_column = ColumnBase.from_pylibcudf( + plc.Column.struct_from_children( + [ + self.obj._data[x].to_pylibcudf(mode="read"), + self.obj._data[y].to_pylibcudf(mode="read"), + ] + ) + ).set_mask(None) + column_pair_structs[(x, y)] = struct_column from cudf.core.dataframe import DataFrame diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 017099636aa..893595e122f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -55,6 +55,7 @@ _maybe_convert_to_default_type, cudf_dtype_from_pa_type, cudf_dtype_to_pa_type, + dtype_to_pylibcudf_type, find_common_type, is_dtype_obj_numeric, is_mixed_with_object_dtype, @@ -5171,29 +5172,34 @@ def __init__( if len(data) == 0: if not hasattr(data, "dtype"): - data = np.array([], dtype=np.int64) + child_type = np.dtype(np.int64) elif isinstance(data.dtype, (pd.IntervalDtype, IntervalDtype)): - data = np.array([], dtype=data.dtype.subtype) - interval_col = IntervalColumn( + child_type = data.dtype.subtype + else: + child_type = data.dtype + child_plc_type = dtype_to_pylibcudf_type(child_type) + left = plc.column_factories.make_empty_column(child_plc_type) + right = plc.column_factories.make_empty_column(child_plc_type) + plc_column = plc.Column( + plc.DataType(plc.TypeId.STRUCT), + 0, + None, None, - dtype=IntervalDtype(data.dtype, closed), - size=len(data), - children=(as_column(data), as_column(data)), + 0, + 0, + [left, right], ) + interval_col = ColumnBase.from_pylibcudf( + plc_column + )._with_type_metadata(IntervalDtype(child_type, closed)) else: col = as_column(data) if not isinstance(col, IntervalColumn): raise TypeError("data must be an iterable of Interval data") if copy: col = col.copy() - interval_col = IntervalColumn( - data=None, - dtype=IntervalDtype(col.dtype.subtype, closed), - mask=col.mask, - size=col.size, - offset=col.offset, - null_count=col.null_count, - children=col.children, # type: ignore[arg-type] + interval_col = col._with_type_metadata( + IntervalDtype(col.dtype.subtype, closed) ) if dtype: @@ -5263,25 +5269,33 @@ def from_breaks( breaks = breaks.astype(np.dtype(np.int64)) if copy: breaks = breaks.copy() - left_col = breaks.slice(0, len(breaks) - 1) - right_col = breaks.slice(1, len(breaks)) + left_col = breaks.slice(0, len(breaks) - 1).to_pylibcudf(mode="read") + right_col = ( + breaks.slice(1, len(breaks)).copy().to_pylibcudf(mode="read") + ) # For indexing, children should both have 0 offset - right_col = type(right_col)( - data=right_col.data, - dtype=right_col.dtype, - size=right_col.size, - mask=right_col.mask, - offset=0, - null_count=right_col.null_count, - children=right_col.children, + right_col = plc.Column( + right_col.type(), + right_col.size(), + right_col.data(), + right_col.null_mask(), + right_col.null_count(), + 0, + right_col.children(), ) - - interval_col = IntervalColumn( - data=None, - dtype=IntervalDtype(left_col.dtype, closed), - size=len(left_col), - children=(left_col, right_col), + plc_column = plc.Column( + plc.DataType(plc.TypeId.STRUCT), + left_col.size(), + None, + None, + 0, + 0, + [left_col, right_col], ) + dtype = IntervalDtype(breaks.dtype, closed) + interval_col = ColumnBase.from_pylibcudf( + plc_column + )._with_type_metadata(dtype) return IntervalIndex._from_column(interval_col, name=name) @classmethod