Skip to content
Merged
6 changes: 5 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,8 @@ def __new__(

elif is_ea_or_datetimelike_dtype(dtype):
# non-EA dtype indexes have special casting logic, so we punt here
pass
if isinstance(data, (set, frozenset)):
data = list(data)
Comment on lines 506 to +508
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is related to what you mentioned on slack, I assume?

idx = pd.Index({1, 2}, dtype="Int64")
idx = pd.Index({1, 2}, dtype="int64")

Whether we consider a set as sequence as valid input for constructors or not (IMO it would also be fine to not do that more generally)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, correct. It#s explicitly allowed for Index, so we shouldn't break this now

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, for Series(..) we disallow it explicitly, but for Index(..) it currently works for non-EA dtypes. Sounds good to allow that for now.


elif is_ea_or_datetimelike_dtype(data_dtype):
pass
Expand Down Expand Up @@ -6877,6 +6878,9 @@ def insert(self, loc: int, item) -> Index:
# We cannot keep the same dtype, so cast to the (often object)
# minimal shared dtype before doing the insert.
dtype = self._find_common_type_compat(item)
if dtype == self.dtype:
# EA's might run into recursion errors if loc is invalid
raise
return self.astype(dtype).insert(loc, item)

if arr.dtype != object or not isinstance(
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/indexes/base_class/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
Index,
Expand Down Expand Up @@ -233,7 +231,6 @@ def test_tuple_union_bug(self, method, expected, sort):
expected = Index(expected)
tm.assert_index_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("first_list", [["b", "a"], []])
@pytest.mark.parametrize("second_list", [["a", "b"], []])
@pytest.mark.parametrize(
Expand All @@ -243,6 +240,7 @@ def test_tuple_union_bug(self, method, expected, sort):
def test_union_name_preservation(
self, first_list, second_list, first_name, second_name, expected_name, sort
):
expected_dtype = object if not first_list or not second_list else "str"
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)
Expand All @@ -253,7 +251,7 @@ def test_union_name_preservation(
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
expected = Index(vals, name=expected_name, dtype=expected_dtype)
tm.assert_index_equal(union.sort_values(), expected.sort_values())

@pytest.mark.parametrize(
Expand Down
12 changes: 2 additions & 10 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,13 @@ def test_constructor_casting(self, index):
tm.assert_contains_all(arr, new_index)
tm.assert_index_equal(index, new_index)

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_constructor_copy(self, using_infer_string):
index = Index(list("abc"), name="name")
arr = np.array(index)
new_index = Index(arr, copy=True, name="name")
assert isinstance(new_index, Index)
assert new_index.name == "name"
if using_infer_string:
if using_infer_string and HAS_PYARROW:
tm.assert_extension_array_equal(
new_index.values, pd.array(arr, dtype="str")
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one I don't understand. Why is index.values a numpy array in case of "python" storage?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I thought I rolled that one back, the change was just something I tried.

Updated now

Expand Down Expand Up @@ -343,11 +340,6 @@ def test_constructor_empty_special(self, empty, klass):
def test_view_with_args(self, index):
index.view("i8")

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW,
reason="TODO(infer_string)",
strict=False,
)
@pytest.mark.parametrize(
"index",
[
Expand All @@ -364,7 +356,7 @@ def test_view_with_args_object_array_raises(self, index):
msg = "When changing to a larger dtype"
with pytest.raises(ValueError, match=msg):
index.view("i8")
elif index.dtype == "string":
elif index.dtype == "str" and not index.dtype.storage == "python":
with pytest.raises(NotImplementedError, match="i8"):
index.view("i8")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just testing a different error, right? (just to understand why this has a separate branch)

(ideally we should also make it a TypeError, I assume, maybe can add a TODO note)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added a TODO

else:
Expand Down
31 changes: 11 additions & 20 deletions pandas/tests/indexes/test_old_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs.tslibs import Timestamp
from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import (
is_integer_dtype,
Expand All @@ -28,6 +25,7 @@
PeriodIndex,
RangeIndex,
Series,
StringDtype,
TimedeltaIndex,
isna,
period_range,
Expand Down Expand Up @@ -229,7 +227,6 @@ def test_logical_compat(self, simple_index):
with pytest.raises(TypeError, match=msg):
idx.any()

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_repr_roundtrip(self, simple_index):
if isinstance(simple_index, IntervalIndex):
pytest.skip(f"Not a valid repr for {type(simple_index).__name__}")
Expand All @@ -246,11 +243,6 @@ def test_repr_max_seq_item_setting(self, simple_index):
repr(idx)
assert "..." not in str(idx)

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW,
reason="TODO(infer_string)",
strict=False,
)
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_ensure_copied_data(self, index):
# Check the "copy" argument of each Index.__new__ is honoured
Expand Down Expand Up @@ -296,7 +288,9 @@ def test_ensure_copied_data(self, index):
tm.assert_numpy_array_equal(
index._values._mask, result._values._mask, check_same="same"
)
elif index.dtype == "string[python]":
elif (
isinstance(index.dtype, StringDtype) and index.dtype.storage == "python"
):
assert np.shares_memory(index._values._ndarray, result._values._ndarray)
tm.assert_numpy_array_equal(
index._values._ndarray, result._values._ndarray, check_same="same"
Expand Down Expand Up @@ -444,11 +438,7 @@ def test_insert_base(self, index):
result = trimmed.insert(0, index[0])
assert index[0:4].equals(result)

@pytest.mark.skipif(
using_string_dtype(),
reason="completely different behavior, tested elsewher",
)
def test_insert_out_of_bounds(self, index):
def test_insert_out_of_bounds(self, index, using_infer_string):
# TypeError/IndexError matches what np.insert raises in these cases

if len(index) > 0:
Expand All @@ -460,6 +450,10 @@ def test_insert_out_of_bounds(self, index):
msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
else:
msg = "slice indices must be integers or None or have an __index__ method"

if using_infer_string and (index.dtype == "str" or index.dtype == "category"): # noqa: PLR1714
msg = "loc must be an integer between"

with pytest.raises(err, match=msg):
index.insert(0.5, "foo")

Expand Down Expand Up @@ -836,7 +830,6 @@ def test_append_preserves_dtype(self, simple_index):
alt = index.take(list(range(N)) * 2)
tm.assert_index_equal(result, alt, check_exact=True)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_inv(self, simple_index, using_infer_string):
idx = simple_index

Expand All @@ -853,10 +846,8 @@ def test_inv(self, simple_index, using_infer_string):
err = TypeError
msg = "ufunc 'invert' not supported for the input types"
elif using_infer_string and idx.dtype == "string":
import pyarrow as pa

err = pa.lib.ArrowNotImplementedError
msg = "has no kernel"
err = TypeError
msg = "not supported for string dtypes"
else:
err = TypeError
msg = "bad operand"
Expand Down
Loading