Skip to content

Commit ab113e9

Browse files
committed
Remove dead code supporting PyArrow < 9
Signed-off-by: Neil Girdhar <[email protected]>
1 parent a33f592 commit ab113e9

19 files changed

+65
-244
lines changed

.buildkite/data.rayci.yml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,6 @@ steps:
2828
wanda: ci/docker/datatfxbsl.build.wanda.yaml
2929

3030
# tests
31-
- label: ":database: data: arrow v9 tests"
32-
tags:
33-
- data
34-
instance_type: medium
35-
parallelism: 2
36-
commands:
37-
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... //python/ray/air/... data
38-
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}"
39-
--worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
40-
--build-name data9build
41-
--except-tags data_integration,doctest,data_non_parallel
42-
depends_on: data9build
43-
44-
- label: ":database: data: arrow v9 tests (data_non_parallel)"
45-
tags:
46-
- data
47-
- data_non_parallel
48-
instance_type: medium
49-
commands:
50-
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... //python/ray/air/... data
51-
--build-name data9build
52-
--only-tags data_non_parallel
53-
depends_on: data9build
54-
5531
- label: ":database: data: arrow v19 tests"
5632
tags:
5733
- python

python/ray/_private/arrow_utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ def add_creatable_buckets_param_if_s3_uri(uri: str) -> str:
8282
URI is an S3 URL; uri will be returned unchanged otherwise.
8383
"""
8484

85-
pyarrow_version = get_pyarrow_version()
86-
if pyarrow_version is not None and pyarrow_version < parse_version("9.0.0"):
87-
# This bucket creation query parameter is not required for pyarrow < 9.0.0.
88-
return uri
8985
parsed_uri = urlparse(uri)
9086
if parsed_uri.scheme == "s3":
9187
uri = _add_url_query_params(uri, {"allow_bucket_creation": True})

python/ray/air/tests/test_air_usage.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,6 @@ def test_tag_storage_type(storage_path_filesystem_expected, mock_record, monkeyp
8787

8888
storage_path, storage_filesystem, expected = storage_path_filesystem_expected
8989

90-
if Version(pyarrow.__version__) < Version("9.0.0") and storage_path.startswith(
91-
"gs://"
92-
):
93-
pytest.skip("GCS support requires pyarrow >= 9.0.0")
94-
9590
storage = StorageContext(
9691
storage_path=storage_path,
9792
experiment_dir_name="test",

python/ray/air/tests/test_object_extension.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,10 @@
88
from ray.air.util.object_extensions.arrow import (
99
ArrowPythonObjectArray,
1010
ArrowPythonObjectType,
11-
_object_extension_type_allowed,
1211
)
1312
from ray.air.util.object_extensions.pandas import PythonObjectArray
1413

1514

16-
@pytest.mark.skipif(
17-
not _object_extension_type_allowed(), reason="Object extension not supported."
18-
)
1915
def test_object_array_validation():
2016
# Test unknown input type raises TypeError.
2117
with pytest.raises(TypeError):
@@ -25,9 +21,6 @@ def test_object_array_validation():
2521
PythonObjectArray([object(), object()])
2622

2723

28-
@pytest.mark.skipif(
29-
not _object_extension_type_allowed(), reason="Object extension not supported."
30-
)
3124
def test_arrow_scalar_object_array_roundtrip():
3225
arr = np.array(
3326
["test", 20, False, {"some": "value"}, None, np.zeros((10, 10))], dtype=object
@@ -41,38 +34,26 @@ def test_arrow_scalar_object_array_roundtrip():
4134
assert np.all(out[-1] == arr[-1])
4235

4336

44-
@pytest.mark.skipif(
45-
not _object_extension_type_allowed(), reason="Object extension not supported."
46-
)
4737
def test_arrow_python_object_array_slice():
4838
arr = np.array(["test", 20, "test2", 40, "test3", 60], dtype=object)
4939
ata = ArrowPythonObjectArray.from_objects(arr)
5040
assert list(ata[1:3].to_pandas()) == [20, "test2"]
5141
assert ata[2:4].to_pylist() == ["test2", 40]
5242

5343

54-
@pytest.mark.skipif(
55-
not _object_extension_type_allowed(), reason="Object extension not supported."
56-
)
5744
def test_arrow_pandas_roundtrip():
5845
obj = types.SimpleNamespace(a=1, b="test")
5946
t1 = pa.table({"a": ArrowPythonObjectArray.from_objects([obj, obj]), "b": [0, 1]})
6047
t2 = pa.Table.from_pandas(t1.to_pandas())
6148
assert t1.equals(t2)
6249

6350

64-
@pytest.mark.skipif(
65-
not _object_extension_type_allowed(), reason="Object extension not supported."
66-
)
6751
def test_pandas_python_object_isna():
6852
arr = np.array([1, np.nan, 3, 4, 5, np.nan, 7, 8, 9], dtype=object)
6953
ta = PythonObjectArray(arr)
7054
np.testing.assert_array_equal(ta.isna(), pd.isna(arr))
7155

7256

73-
@pytest.mark.skipif(
74-
not _object_extension_type_allowed(), reason="Object extension not supported."
75-
)
7657
def test_pandas_python_object_take():
7758
arr = np.array([1, 2, 3, 4, 5], dtype=object)
7859
ta = PythonObjectArray(arr)
@@ -85,9 +66,6 @@ def test_pandas_python_object_take():
8566
)
8667

8768

88-
@pytest.mark.skipif(
89-
not _object_extension_type_allowed(), reason="Object extension not supported."
90-
)
9169
def test_pandas_python_object_concat():
9270
arr1 = np.array([1, 2, 3, 4, 5], dtype=object)
9371
arr2 = np.array([6, 7, 8, 9, 10], dtype=object)

python/ray/air/tests/test_tensor_extension.py

Lines changed: 8 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -517,20 +517,8 @@ def test_arrow_tensor_array_getitem(chunked, restore_data_context, tensor_format
517517
if chunked:
518518
t_arr = pa.chunked_array(t_arr)
519519

520-
pyarrow_version = get_pyarrow_version()
521-
if (
522-
chunked
523-
and pyarrow_version >= parse_version("8.0.0")
524-
and pyarrow_version < parse_version("9.0.0")
525-
):
526-
for idx in range(outer_dim):
527-
item = t_arr[idx]
528-
assert isinstance(item, pa.ExtensionScalar)
529-
item = item.type._extension_scalar_to_ndarray(item)
530-
np.testing.assert_array_equal(item, arr[idx])
531-
else:
532-
for idx in range(outer_dim):
533-
np.testing.assert_array_equal(t_arr[idx], arr[idx])
520+
for idx in range(outer_dim):
521+
np.testing.assert_array_equal(t_arr[idx], arr[idx])
534522

535523
# Test __iter__.
536524
for t_subarr, subarr in zip(t_arr, arr):
@@ -554,19 +542,8 @@ def test_arrow_tensor_array_getitem(chunked, restore_data_context, tensor_format
554542

555543
np.testing.assert_array_equal(t_arr2_npy, arr[1:])
556544

557-
if (
558-
chunked
559-
and pyarrow_version >= parse_version("8.0.0")
560-
and pyarrow_version < parse_version("9.0.0")
561-
):
562-
for idx in range(1, outer_dim):
563-
item = t_arr2[idx - 1]
564-
assert isinstance(item, pa.ExtensionScalar)
565-
item = item.type._extension_scalar_to_ndarray(item)
566-
np.testing.assert_array_equal(item, arr[idx])
567-
else:
568-
for idx in range(1, outer_dim):
569-
np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx])
545+
for idx in range(1, outer_dim):
546+
np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx])
570547

571548

572549
@pytest.mark.parametrize("tensor_format", ["v1", "v2"])
@@ -589,20 +566,8 @@ def test_arrow_variable_shaped_tensor_array_getitem(
589566
if chunked:
590567
t_arr = pa.chunked_array(t_arr)
591568

592-
pyarrow_version = get_pyarrow_version()
593-
if (
594-
chunked
595-
and pyarrow_version >= parse_version("8.0.0")
596-
and pyarrow_version < parse_version("9.0.0")
597-
):
598-
for idx in range(outer_dim):
599-
item = t_arr[idx]
600-
assert isinstance(item, pa.ExtensionScalar)
601-
item = item.type._extension_scalar_to_ndarray(item)
602-
np.testing.assert_array_equal(item, arr[idx])
603-
else:
604-
for idx in range(outer_dim):
605-
np.testing.assert_array_equal(t_arr[idx], arr[idx])
569+
for idx in range(outer_dim):
570+
np.testing.assert_array_equal(t_arr[idx], arr[idx])
606571

607572
# Test __iter__.
608573
for t_subarr, subarr in zip(t_arr, arr):
@@ -628,19 +593,8 @@ def test_arrow_variable_shaped_tensor_array_getitem(
628593
for t_subarr, subarr in zip(t_arr2_npy, arr[1:]):
629594
np.testing.assert_array_equal(t_subarr, subarr)
630595

631-
if (
632-
chunked
633-
and pyarrow_version >= parse_version("8.0.0")
634-
and pyarrow_version < parse_version("9.0.0")
635-
):
636-
for idx in range(1, outer_dim):
637-
item = t_arr2[idx - 1]
638-
assert isinstance(item, pa.ExtensionScalar)
639-
item = item.type._extension_scalar_to_ndarray(item)
640-
np.testing.assert_array_equal(item, arr[idx])
641-
else:
642-
for idx in range(1, outer_dim):
643-
np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx])
596+
for idx in range(1, outer_dim):
597+
np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx])
644598

645599

646600
@pytest.mark.parametrize("tensor_format", ["v1", "v2"])

python/ray/air/util/object_extensions/arrow.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,6 @@
1010
from ray._private.arrow_utils import get_pyarrow_version
1111
from ray.util.annotations import PublicAPI
1212

13-
MIN_PYARROW_VERSION_SCALAR_SUBCLASS = parse_version("9.0.0")
14-
15-
PYARROW_VERSION = get_pyarrow_version()
16-
17-
18-
def _object_extension_type_allowed() -> bool:
19-
return (
20-
PYARROW_VERSION is not None
21-
and PYARROW_VERSION >= MIN_PYARROW_VERSION_SCALAR_SUBCLASS
22-
)
23-
2413

2514
# Please see https://arrow.apache.org/docs/python/extending_types.html for more info
2615
@PublicAPI(stability="alpha")
@@ -89,7 +78,7 @@ class ArrowPythonObjectArray(pa.ExtensionArray):
8978
"""Array class for ArrowPythonObjectType"""
9079

9180
def from_objects(
92-
objects: typing.Union[np.ndarray, typing.Iterable[typing.Any]]
81+
objects: typing.Union[np.ndarray, typing.Iterable[typing.Any]],
9382
) -> "ArrowPythonObjectArray":
9483
if isinstance(objects, np.ndarray):
9584
objects = objects.tolist()

python/ray/air/util/tensor_extensions/arrow.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -132,40 +132,29 @@ def convert_to_pyarrow_array(
132132

133133
except ArrowConversionError as ace:
134134
from ray.data import DataContext
135-
from ray.data.extensions.object_extension import (
136-
ArrowPythonObjectArray,
137-
_object_extension_type_allowed,
138-
)
135+
from ray.data.extensions.object_extension import ArrowPythonObjectArray
139136

140137
enable_fallback_config: Optional[
141138
bool
142139
] = DataContext.get_current().enable_fallback_to_arrow_object_ext_type
143140

144-
if not _object_extension_type_allowed():
145-
object_ext_type_fallback_allowed = False
141+
# NOTE: By default setting is unset which (for compatibility reasons)
142+
# is allowing the fallback
143+
object_ext_type_fallback_allowed = (
144+
enable_fallback_config is None or enable_fallback_config
145+
)
146+
147+
if object_ext_type_fallback_allowed:
146148
object_ext_type_detail = (
147-
"skipping fallback to serialize as pickled python"
148-
f" objects (due to unsupported Arrow version {PYARROW_VERSION}, "
149-
f"min required version is {MIN_PYARROW_VERSION_SCALAR_SUBCLASS})"
149+
"falling back to serialize as pickled python objects"
150150
)
151151
else:
152-
# NOTE: By default setting is unset which (for compatibility reasons)
153-
# is allowing the fallback
154-
object_ext_type_fallback_allowed = (
155-
enable_fallback_config is None or enable_fallback_config
152+
object_ext_type_detail = (
153+
"skipping fallback to serialize as pickled python objects "
154+
"(due to DataContext.enable_fallback_to_arrow_object_ext_type "
155+
"= False)"
156156
)
157157

158-
if object_ext_type_fallback_allowed:
159-
object_ext_type_detail = (
160-
"falling back to serialize as pickled python objects"
161-
)
162-
else:
163-
object_ext_type_detail = (
164-
"skipping fallback to serialize as pickled python objects "
165-
"(due to DataContext.enable_fallback_to_arrow_object_ext_type "
166-
"= False)"
167-
)
168-
169158
# To avoid logging following warning for every block it's
170159
# only going to be logged in following cases
171160
# - It's being logged for the first time, and
@@ -293,7 +282,7 @@ def _coerce_np_datetime_to_pa_timestamp_precision(
293282

294283

295284
def _infer_pyarrow_type(
296-
column_values: Union[List[Any], np.ndarray]
285+
column_values: Union[List[Any], np.ndarray],
297286
) -> Optional[pa.DataType]:
298287
"""Infers target Pyarrow `DataType` based on the provided
299288
columnar values.
@@ -375,7 +364,7 @@ def _len_gt_overflow_threshold(obj: Any) -> bool:
375364

376365

377366
def _try_infer_pa_timestamp_type(
378-
column_values: Union[List[Any], np.ndarray]
367+
column_values: Union[List[Any], np.ndarray],
379368
) -> Optional[pa.DataType]:
380369
if isinstance(column_values, list) and len(column_values) > 0:
381370
# In case provided column values is a list of elements, this

python/ray/data/_internal/arrow_block.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -195,21 +195,11 @@ def _build_tensor_row(
195195
from packaging.version import parse as parse_version
196196

197197
element = row[col_name][0]
198-
# TODO(Clark): Reduce this to np.asarray(element) once we only support Arrow
199-
# 9.0.0+.
200-
pyarrow_version = get_pyarrow_version()
201-
if pyarrow_version is None or pyarrow_version >= parse_version("8.0.0"):
202-
assert isinstance(element, pyarrow.ExtensionScalar)
203-
if pyarrow_version is None or pyarrow_version >= parse_version("9.0.0"):
204-
# For Arrow 9.0.0+, accessing an element in a chunked tensor array
205-
# produces an ArrowTensorScalar, which we convert to an ndarray using
206-
# .as_py().
207-
element = element.as_py()
208-
else:
209-
# For Arrow 8.*, accessing an element in a chunked tensor array produces
210-
# an ExtensionScalar, which we convert to an ndarray using our custom
211-
# method.
212-
element = element.type._extension_scalar_to_ndarray(element)
198+
assert isinstance(element, pyarrow.ExtensionScalar)
199+
# For Arrow 9.0.0+, accessing an element in a chunked tensor array
200+
# produces an ArrowTensorScalar, which we convert to an ndarray using
201+
# .as_py().
202+
element = element.as_py()
213203
# For Arrow < 8.0.0, accessing an element in a chunked tensor array produces an
214204
# ndarray, which we return directly.
215205
assert isinstance(element, np.ndarray), type(element)

python/ray/data/extensions/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
ArrowPythonObjectType,
99
PythonObjectArray,
1010
PythonObjectDtype,
11-
_object_extension_type_allowed,
1211
)
1312
from ray.data.extensions.tensor_extension import (
1413
ArrowConversionError,
@@ -40,6 +39,5 @@
4039
"ArrowPythonObjectScalar",
4140
"PythonObjectArray",
4241
"PythonObjectDtype",
43-
"_object_extension_type_allowed",
4442
"get_arrow_extension_tensor_types",
4543
]

python/ray/data/extensions/object_extension.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
ArrowPythonObjectArray,
33
ArrowPythonObjectScalar,
44
ArrowPythonObjectType,
5-
_object_extension_type_allowed,
65
)
76
from ray.air.util.object_extensions.pandas import ( # noqa: F401
87
PythonObjectArray,

0 commit comments

Comments
 (0)