Skip to content
22 changes: 3 additions & 19 deletions python/cudf/cudf/core/column/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,29 +124,13 @@ def overlaps(other) -> ColumnBase:
def set_closed(
self, closed: Literal["left", "right", "both", "neither"]
) -> Self:
return IntervalColumn( # type: ignore[return-value]
data=None,
size=self.size,
dtype=IntervalDtype(self.dtype.subtype, closed),
mask=self.base_mask,
offset=self.offset,
null_count=self.null_count,
children=self.base_children, # type: ignore[arg-type]
return self._with_type_metadata( # type: ignore[return-value]
IntervalDtype(self.dtype.subtype, closed)
)

def as_interval_column(self, dtype: IntervalDtype) -> Self: # type: ignore[override]
if isinstance(dtype, IntervalDtype):
return IntervalColumn( # type: ignore[return-value]
data=None,
size=self.size,
dtype=dtype,
mask=self.mask,
offset=self.offset,
null_count=self.null_count,
children=tuple( # type: ignore[arg-type]
child.astype(dtype.subtype) for child in self.children
),
)
return self._with_type_metadata(dtype) # type: ignore[return-value]
else:
raise ValueError("dtype must be IntervalDtype")

Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@ def _with_type_metadata(self: StructColumn, dtype: Dtype) -> StructColumn:
mask=self.base_mask,
offset=self.offset,
null_count=self.null_count,
children=self.base_children, # type: ignore[arg-type]
children=tuple( # type: ignore[arg-type]
child.astype(dtype.subtype) for child in self.base_children
), # type: ignore[arg-type]
)
elif isinstance(dtype, StructDtype):
return StructColumn(
Expand Down
19 changes: 9 additions & 10 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
deserialize_columns,
serialize_columns,
)
from cudf.core.column.struct import StructColumn
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.common import pipe
from cudf.core.copy_types import GatherMap
Expand Down Expand Up @@ -2543,15 +2542,15 @@ def _cov_or_corr(self, func, method_name):
)
x, y = str(x), str(y)

column_pair_structs[(x, y)] = StructColumn(
data=None,
dtype=StructDtype(
fields={x: self.obj._data[x].dtype, y: self.obj._data[y]}
),
children=(self.obj._data[x], self.obj._data[y]),
size=len(self.obj),
offset=0,
)
struct_column = ColumnBase.from_pylibcudf(
plc.Column.struct_from_children(
[
self.obj._data[x].to_pylibcudf(mode="read"),
self.obj._data[y].to_pylibcudf(mode="read"),
]
)
).set_mask(None)
column_pair_structs[(x, y)] = struct_column

from cudf.core.dataframe import DataFrame

Expand Down
74 changes: 44 additions & 30 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
_maybe_convert_to_default_type,
cudf_dtype_from_pa_type,
cudf_dtype_to_pa_type,
dtype_to_pylibcudf_type,
find_common_type,
is_dtype_obj_numeric,
is_mixed_with_object_dtype,
Expand Down Expand Up @@ -5171,29 +5172,34 @@ def __init__(

if len(data) == 0:
if not hasattr(data, "dtype"):
data = np.array([], dtype=np.int64)
child_type = np.dtype(np.int64)
elif isinstance(data.dtype, (pd.IntervalDtype, IntervalDtype)):
data = np.array([], dtype=data.dtype.subtype)
interval_col = IntervalColumn(
child_type = data.dtype.subtype
else:
child_type = data.dtype
child_plc_type = dtype_to_pylibcudf_type(child_type)
left = plc.column_factories.make_empty_column(child_plc_type)
right = plc.column_factories.make_empty_column(child_plc_type)
plc_column = plc.Column(
plc.DataType(plc.TypeId.STRUCT),
0,
None,
None,
dtype=IntervalDtype(data.dtype, closed),
size=len(data),
children=(as_column(data), as_column(data)),
0,
0,
[left, right],
)
interval_col = ColumnBase.from_pylibcudf(
plc_column
)._with_type_metadata(IntervalDtype(child_type, closed))
else:
col = as_column(data)
if not isinstance(col, IntervalColumn):
raise TypeError("data must be an iterable of Interval data")
if copy:
col = col.copy()
interval_col = IntervalColumn(
data=None,
dtype=IntervalDtype(col.dtype.subtype, closed),
mask=col.mask,
size=col.size,
offset=col.offset,
null_count=col.null_count,
children=col.children, # type: ignore[arg-type]
interval_col = col._with_type_metadata(
IntervalDtype(col.dtype.subtype, closed)
)

if dtype:
Expand Down Expand Up @@ -5263,25 +5269,33 @@ def from_breaks(
breaks = breaks.astype(np.dtype(np.int64))
if copy:
breaks = breaks.copy()
left_col = breaks.slice(0, len(breaks) - 1)
right_col = breaks.slice(1, len(breaks))
left_col = breaks.slice(0, len(breaks) - 1).to_pylibcudf(mode="read")
right_col = (
breaks.slice(1, len(breaks)).copy().to_pylibcudf(mode="read")
)
# For indexing, children should both have 0 offset
right_col = type(right_col)(
data=right_col.data,
dtype=right_col.dtype,
size=right_col.size,
mask=right_col.mask,
offset=0,
null_count=right_col.null_count,
children=right_col.children,
right_col = plc.Column(
right_col.type(),
right_col.size(),
right_col.data(),
right_col.null_mask(),
right_col.null_count(),
0,
right_col.children(),
)

interval_col = IntervalColumn(
data=None,
dtype=IntervalDtype(left_col.dtype, closed),
size=len(left_col),
children=(left_col, right_col),
plc_column = plc.Column(
plc.DataType(plc.TypeId.STRUCT),
left_col.size(),
None,
None,
0,
0,
[left_col, right_col],
)
dtype = IntervalDtype(breaks.dtype, closed)
interval_col = ColumnBase.from_pylibcudf(
plc_column
)._with_type_metadata(dtype)
return IntervalIndex._from_column(interval_col, name=name)

@classmethod
Expand Down