diff --git a/.buildkite/data.rayci.yml b/.buildkite/data.rayci.yml index ec9469e834b0..ff1137139659 100644 --- a/.buildkite/data.rayci.yml +++ b/.buildkite/data.rayci.yml @@ -28,30 +28,6 @@ steps: wanda: ci/docker/datatfxbsl.build.wanda.yaml # tests - - label: ":database: data: arrow v9 tests" - tags: - - data - instance_type: medium - parallelism: 2 - commands: - - bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... //python/ray/air/... data - --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" - --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 - --build-name data9build - --except-tags data_integration,doctest,data_non_parallel - depends_on: data9build - - - label: ":database: data: arrow v9 tests (data_non_parallel)" - tags: - - data - - data_non_parallel - instance_type: medium - commands: - - bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... //python/ray/air/... data - --build-name data9build - --only-tags data_non_parallel - depends_on: data9build - - label: ":database: data: arrow v19 tests" tags: - python diff --git a/python/ray/_private/arrow_utils.py b/python/ray/_private/arrow_utils.py index 0c75ef6c4fa7..cd6fb8a07363 100644 --- a/python/ray/_private/arrow_utils.py +++ b/python/ray/_private/arrow_utils.py @@ -82,10 +82,6 @@ def add_creatable_buckets_param_if_s3_uri(uri: str) -> str: URI is an S3 URL; uri will be returned unchanged otherwise. """ - pyarrow_version = get_pyarrow_version() - if pyarrow_version is not None and pyarrow_version < parse_version("9.0.0"): - # This bucket creation query parameter is not required for pyarrow < 9.0.0. - return uri parsed_uri = urlparse(uri) if parsed_uri.scheme == "s3": uri = _add_url_query_params(uri, {"allow_bucket_creation": True}) diff --git a/python/ray/air/tests/test_air_usage.py b/python/ray/air/tests/test_air_usage.py index a2e14f3607c2..aca2830a0271 100644 --- a/python/ray/air/tests/test_air_usage.py +++ b/python/ray/air/tests/test_air_usage.py @@ -87,11 +87,6 @@ def test_tag_storage_type(storage_path_filesystem_expected, mock_record, monkeyp storage_path, storage_filesystem, expected = storage_path_filesystem_expected - if Version(pyarrow.__version__) < Version("9.0.0") and storage_path.startswith( - "gs://" - ): - pytest.skip("GCS support requires pyarrow >= 9.0.0") - storage = StorageContext( storage_path=storage_path, experiment_dir_name="test", diff --git a/python/ray/air/tests/test_object_extension.py b/python/ray/air/tests/test_object_extension.py index b95f4c44a958..a8d439790be2 100644 --- a/python/ray/air/tests/test_object_extension.py +++ b/python/ray/air/tests/test_object_extension.py @@ -8,14 +8,10 @@ from ray.air.util.object_extensions.arrow import ( ArrowPythonObjectArray, ArrowPythonObjectType, - _object_extension_type_allowed, ) from ray.air.util.object_extensions.pandas import PythonObjectArray -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_object_array_validation(): # Test unknown input type raises TypeError. with pytest.raises(TypeError): @@ -25,9 +21,6 @@ def test_object_array_validation(): PythonObjectArray([object(), object()]) -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_arrow_scalar_object_array_roundtrip(): arr = np.array( ["test", 20, False, {"some": "value"}, None, np.zeros((10, 10))], dtype=object @@ -41,9 +34,6 @@ def test_arrow_scalar_object_array_roundtrip(): assert np.all(out[-1] == arr[-1]) -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_arrow_python_object_array_slice(): arr = np.array(["test", 20, "test2", 40, "test3", 60], dtype=object) ata = ArrowPythonObjectArray.from_objects(arr) @@ -51,9 +41,6 @@ def test_arrow_python_object_array_slice(): assert ata[2:4].to_pylist() == ["test2", 40] -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_arrow_pandas_roundtrip(): obj = types.SimpleNamespace(a=1, b="test") t1 = pa.table({"a": ArrowPythonObjectArray.from_objects([obj, obj]), "b": [0, 1]}) @@ -61,18 +48,12 @@ def test_arrow_pandas_roundtrip(): assert t1.equals(t2) -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_pandas_python_object_isna(): arr = np.array([1, np.nan, 3, 4, 5, np.nan, 7, 8, 9], dtype=object) ta = PythonObjectArray(arr) np.testing.assert_array_equal(ta.isna(), pd.isna(arr)) -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_pandas_python_object_take(): arr = np.array([1, 2, 3, 4, 5], dtype=object) ta = PythonObjectArray(arr) @@ -85,9 +66,6 @@ def test_pandas_python_object_take(): ) -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_pandas_python_object_concat(): arr1 = np.array([1, 2, 3, 4, 5], dtype=object) arr2 = np.array([6, 7, 8, 9, 10], dtype=object) diff --git a/python/ray/air/tests/test_tensor_extension.py b/python/ray/air/tests/test_tensor_extension.py index fb5b6bbd43ab..d5f0767f611a 100644 --- a/python/ray/air/tests/test_tensor_extension.py +++ b/python/ray/air/tests/test_tensor_extension.py @@ -517,20 +517,8 @@ def test_arrow_tensor_array_getitem(chunked, restore_data_context, tensor_format if chunked: t_arr = pa.chunked_array(t_arr) - pyarrow_version = get_pyarrow_version() - if ( - chunked - and pyarrow_version >= parse_version("8.0.0") - and pyarrow_version < parse_version("9.0.0") - ): - for idx in range(outer_dim): - item = t_arr[idx] - assert isinstance(item, pa.ExtensionScalar) - item = item.type._extension_scalar_to_ndarray(item) - np.testing.assert_array_equal(item, arr[idx]) - else: - for idx in range(outer_dim): - np.testing.assert_array_equal(t_arr[idx], arr[idx]) + for idx in range(outer_dim): + np.testing.assert_array_equal(t_arr[idx], arr[idx]) # Test __iter__. for t_subarr, subarr in zip(t_arr, arr): @@ -554,19 +542,8 @@ def test_arrow_tensor_array_getitem(chunked, restore_data_context, tensor_format np.testing.assert_array_equal(t_arr2_npy, arr[1:]) - if ( - chunked - and pyarrow_version >= parse_version("8.0.0") - and pyarrow_version < parse_version("9.0.0") - ): - for idx in range(1, outer_dim): - item = t_arr2[idx - 1] - assert isinstance(item, pa.ExtensionScalar) - item = item.type._extension_scalar_to_ndarray(item) - np.testing.assert_array_equal(item, arr[idx]) - else: - for idx in range(1, outer_dim): - np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx]) + for idx in range(1, outer_dim): + np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx]) @pytest.mark.parametrize("tensor_format", ["v1", "v2"]) @@ -589,20 +566,8 @@ def test_arrow_variable_shaped_tensor_array_getitem( if chunked: t_arr = pa.chunked_array(t_arr) - pyarrow_version = get_pyarrow_version() - if ( - chunked - and pyarrow_version >= parse_version("8.0.0") - and pyarrow_version < parse_version("9.0.0") - ): - for idx in range(outer_dim): - item = t_arr[idx] - assert isinstance(item, pa.ExtensionScalar) - item = item.type._extension_scalar_to_ndarray(item) - np.testing.assert_array_equal(item, arr[idx]) - else: - for idx in range(outer_dim): - np.testing.assert_array_equal(t_arr[idx], arr[idx]) + for idx in range(outer_dim): + np.testing.assert_array_equal(t_arr[idx], arr[idx]) # Test __iter__. for t_subarr, subarr in zip(t_arr, arr): @@ -628,19 +593,8 @@ def test_arrow_variable_shaped_tensor_array_getitem( for t_subarr, subarr in zip(t_arr2_npy, arr[1:]): np.testing.assert_array_equal(t_subarr, subarr) - if ( - chunked - and pyarrow_version >= parse_version("8.0.0") - and pyarrow_version < parse_version("9.0.0") - ): - for idx in range(1, outer_dim): - item = t_arr2[idx - 1] - assert isinstance(item, pa.ExtensionScalar) - item = item.type._extension_scalar_to_ndarray(item) - np.testing.assert_array_equal(item, arr[idx]) - else: - for idx in range(1, outer_dim): - np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx]) + for idx in range(1, outer_dim): + np.testing.assert_array_equal(t_arr2[idx - 1], arr[idx]) @pytest.mark.parametrize("tensor_format", ["v1", "v2"]) diff --git a/python/ray/air/util/object_extensions/arrow.py b/python/ray/air/util/object_extensions/arrow.py index 180fcfc96367..fc6ae16d9f54 100644 --- a/python/ray/air/util/object_extensions/arrow.py +++ b/python/ray/air/util/object_extensions/arrow.py @@ -10,17 +10,6 @@ from ray._private.arrow_utils import get_pyarrow_version from ray.util.annotations import PublicAPI -MIN_PYARROW_VERSION_SCALAR_SUBCLASS = parse_version("9.0.0") - -PYARROW_VERSION = get_pyarrow_version() - - -def _object_extension_type_allowed() -> bool: - return ( - PYARROW_VERSION is not None - and PYARROW_VERSION >= MIN_PYARROW_VERSION_SCALAR_SUBCLASS - ) - # Please see https://arrow.apache.org/docs/python/extending_types.html for more info @PublicAPI(stability="alpha") @@ -89,7 +78,7 @@ class ArrowPythonObjectArray(pa.ExtensionArray): """Array class for ArrowPythonObjectType""" def from_objects( - objects: typing.Union[np.ndarray, typing.Iterable[typing.Any]] + objects: typing.Union[np.ndarray, typing.Iterable[typing.Any]], ) -> "ArrowPythonObjectArray": if isinstance(objects, np.ndarray): objects = objects.tolist() diff --git a/python/ray/air/util/tensor_extensions/arrow.py b/python/ray/air/util/tensor_extensions/arrow.py index 146968364424..06afde890bf4 100644 --- a/python/ray/air/util/tensor_extensions/arrow.py +++ b/python/ray/air/util/tensor_extensions/arrow.py @@ -132,40 +132,29 @@ def convert_to_pyarrow_array( except ArrowConversionError as ace: from ray.data import DataContext - from ray.data.extensions.object_extension import ( - ArrowPythonObjectArray, - _object_extension_type_allowed, - ) + from ray.data.extensions.object_extension import ArrowPythonObjectArray enable_fallback_config: Optional[ bool ] = DataContext.get_current().enable_fallback_to_arrow_object_ext_type - if not _object_extension_type_allowed(): - object_ext_type_fallback_allowed = False + # NOTE: By default setting is unset which (for compatibility reasons) + # is allowing the fallback + object_ext_type_fallback_allowed = ( + enable_fallback_config is None or enable_fallback_config + ) + + if object_ext_type_fallback_allowed: object_ext_type_detail = ( - "skipping fallback to serialize as pickled python" - f" objects (due to unsupported Arrow version {PYARROW_VERSION}, " - f"min required version is {MIN_PYARROW_VERSION_SCALAR_SUBCLASS})" + "falling back to serialize as pickled python objects" ) else: - # NOTE: By default setting is unset which (for compatibility reasons) - # is allowing the fallback - object_ext_type_fallback_allowed = ( - enable_fallback_config is None or enable_fallback_config + object_ext_type_detail = ( + "skipping fallback to serialize as pickled python objects " + "(due to DataContext.enable_fallback_to_arrow_object_ext_type " + "= False)" ) - if object_ext_type_fallback_allowed: - object_ext_type_detail = ( - "falling back to serialize as pickled python objects" - ) - else: - object_ext_type_detail = ( - "skipping fallback to serialize as pickled python objects " - "(due to DataContext.enable_fallback_to_arrow_object_ext_type " - "= False)" - ) - # To avoid logging following warning for every block it's # only going to be logged in following cases # - It's being logged for the first time, and @@ -293,7 +282,7 @@ def _coerce_np_datetime_to_pa_timestamp_precision( def _infer_pyarrow_type( - column_values: Union[List[Any], np.ndarray] + column_values: Union[List[Any], np.ndarray], ) -> Optional[pa.DataType]: """Infers target Pyarrow `DataType` based on the provided columnar values. @@ -375,7 +364,7 @@ def _len_gt_overflow_threshold(obj: Any) -> bool: def _try_infer_pa_timestamp_type( - column_values: Union[List[Any], np.ndarray] + column_values: Union[List[Any], np.ndarray], ) -> Optional[pa.DataType]: if isinstance(column_values, list) and len(column_values) > 0: # In case provided column values is a list of elements, this diff --git a/python/ray/data/_internal/arrow_block.py b/python/ray/data/_internal/arrow_block.py index d954e7146926..4145c04ee77d 100644 --- a/python/ray/data/_internal/arrow_block.py +++ b/python/ray/data/_internal/arrow_block.py @@ -195,21 +195,11 @@ def _build_tensor_row( from packaging.version import parse as parse_version element = row[col_name][0] - # TODO(Clark): Reduce this to np.asarray(element) once we only support Arrow - # 9.0.0+. - pyarrow_version = get_pyarrow_version() - if pyarrow_version is None or pyarrow_version >= parse_version("8.0.0"): - assert isinstance(element, pyarrow.ExtensionScalar) - if pyarrow_version is None or pyarrow_version >= parse_version("9.0.0"): - # For Arrow 9.0.0+, accessing an element in a chunked tensor array - # produces an ArrowTensorScalar, which we convert to an ndarray using - # .as_py(). - element = element.as_py() - else: - # For Arrow 8.*, accessing an element in a chunked tensor array produces - # an ExtensionScalar, which we convert to an ndarray using our custom - # method. - element = element.type._extension_scalar_to_ndarray(element) + assert isinstance(element, pyarrow.ExtensionScalar) + # For Arrow 9.0.0+, accessing an element in a chunked tensor array + # produces an ArrowTensorScalar, which we convert to an ndarray using + # .as_py(). + element = element.as_py() # For Arrow < 8.0.0, accessing an element in a chunked tensor array produces an # ndarray, which we return directly. assert isinstance(element, np.ndarray), type(element) diff --git a/python/ray/data/extensions/__init__.py b/python/ray/data/extensions/__init__.py index 517b4fe7a3a2..0e8954988e1d 100644 --- a/python/ray/data/extensions/__init__.py +++ b/python/ray/data/extensions/__init__.py @@ -8,7 +8,6 @@ ArrowPythonObjectType, PythonObjectArray, PythonObjectDtype, - _object_extension_type_allowed, ) from ray.data.extensions.tensor_extension import ( ArrowConversionError, @@ -40,6 +39,5 @@ "ArrowPythonObjectScalar", "PythonObjectArray", "PythonObjectDtype", - "_object_extension_type_allowed", "get_arrow_extension_tensor_types", ] diff --git a/python/ray/data/extensions/object_extension.py b/python/ray/data/extensions/object_extension.py index 42ab20a231c6..cd97bb398c34 100644 --- a/python/ray/data/extensions/object_extension.py +++ b/python/ray/data/extensions/object_extension.py @@ -2,7 +2,6 @@ ArrowPythonObjectArray, ArrowPythonObjectScalar, ArrowPythonObjectType, - _object_extension_type_allowed, ) from ray.air.util.object_extensions.pandas import ( # noqa: F401 PythonObjectArray, diff --git a/python/ray/data/tests/conftest.py b/python/ray/data/tests/conftest.py index 905c3e210435..1d0baceb25df 100644 --- a/python/ray/data/tests/conftest.py +++ b/python/ray/data/tests/conftest.py @@ -127,9 +127,8 @@ def _s3_fs(aws_credentials, s3_server, s3_path): kwargs = aws_credentials.copy() - if get_pyarrow_version() >= parse_version("9.0.0"): - kwargs["allow_bucket_creation"] = True - kwargs["allow_bucket_deletion"] = True + kwargs["allow_bucket_creation"] = True + kwargs["allow_bucket_deletion"] = True fs = pa.fs.S3FileSystem( region="us-west-2", diff --git a/python/ray/data/tests/test_all_to_all.py b/python/ray/data/tests/test_all_to_all.py index c82d5dc21457..407d958c04fc 100644 --- a/python/ray/data/tests/test_all_to_all.py +++ b/python/ray/data/tests/test_all_to_all.py @@ -1881,9 +1881,6 @@ def func(df, a, b, c): assert sorted([x["value"] for x in ds.take()]) == [6, 8, 10, 12] -_NEED_UNWRAP_ARROW_SCALAR = get_pyarrow_version() <= parse_version("9.0.0") - - @pytest.mark.parametrize("num_parts", [1, 30]) @pytest.mark.parametrize("ds_format", ["pyarrow", "pandas", "numpy"]) def test_groupby_map_groups_multicolumn( @@ -1902,8 +1899,6 @@ def test_groupby_map_groups_multicolumn( num_parts ) - should_unwrap_pa_scalars = ds_format == "pyarrow" and _NEED_UNWRAP_ARROW_SCALAR - def _map_group(df): # NOTE: Since we're grouping by A and B, these columns will be bearing # the same values. @@ -1911,8 +1906,8 @@ def _map_group(df): b = df["B"][0] return { # NOTE: PA 9.0 requires explicit unwrapping into Python objects - "A": [a.as_py() if should_unwrap_pa_scalars else a], - "B": [b.as_py() if should_unwrap_pa_scalars else b], + "A": [a], + "B": [b], "count": [len(df["A"])], } @@ -1956,8 +1951,6 @@ def test_groupby_map_groups_multicolumn_with_nan( ] ).repartition(num_parts) - should_unwrap_pa_scalars = ds_format == "pyarrow" and _NEED_UNWRAP_ARROW_SCALAR - def _map_group(df): # NOTE: Since we're grouping by A and B, these columns will be bearing # the same values @@ -1965,8 +1958,8 @@ def _map_group(df): b = df["B"][0] return { # NOTE: PA 9.0 requires explicit unwrapping into Python objects - "A": [a.as_py() if should_unwrap_pa_scalars else a], - "B": [b.as_py() if should_unwrap_pa_scalars else b], + "A": [a], + "B": [b], "count": [len(df["A"])], } diff --git a/python/ray/data/tests/test_arrow_block.py b/python/ray/data/tests/test_arrow_block.py index 5ca8009910cc..787941e8b9a3 100644 --- a/python/ray/data/tests/test_arrow_block.py +++ b/python/ray/data/tests/test_arrow_block.py @@ -25,7 +25,6 @@ from ray.data._internal.arrow_ops.transform_pyarrow import combine_chunked_array from ray.data._internal.util import GiB, MiB from ray.data.block import BlockAccessor -from ray.data.extensions.object_extension import _object_extension_type_allowed def simple_array(): @@ -460,9 +459,6 @@ def test_register_arrow_types(tmp_path): run_string_as_driver(driver_script) -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension type not supported." -) def test_dict_doesnt_fallback_to_pandas_block(ray_start_regular_shared): # If the UDF returns a column with dict, previously, we would # fall back to pandas, because we couldn't convert it to diff --git a/python/ray/data/tests/test_arrow_serialization.py b/python/ray/data/tests/test_arrow_serialization.py index eb1da5d93dca..4f95ea00d124 100644 --- a/python/ray/data/tests/test_arrow_serialization.py +++ b/python/ray/data/tests/test_arrow_serialization.py @@ -26,7 +26,6 @@ from ray._private.arrow_utils import get_pyarrow_version from ray.data.extensions.object_extension import ( ArrowPythonObjectArray, - _object_extension_type_allowed, ) from ray.data.extensions.tensor_extension import ( ArrowTensorArray, @@ -415,14 +414,10 @@ def pickled_objects_array(): (lazy_fixture("list_of_empty_struct_array"), 0.1), # Complex nested array (lazy_fixture("complex_nested_array"), 0.1), + # Array of pickled objects + (lazy_fixture("pickled_objects_array"), 0.1), ] -if _object_extension_type_allowed(): - pytest_custom_serialization_arrays.append( - # Array of pickled objects - (lazy_fixture("pickled_objects_array"), 0.1), - ) - @pytest.mark.parametrize("data,cap_mult", pytest_custom_serialization_arrays) def test_custom_arrow_data_serializer(ray_start_regular_shared, data, cap_mult): @@ -543,9 +538,6 @@ def fn(batch: list): assert res == [{"id": 1}], res -@pytest.mark.skipif( - not _object_extension_type_allowed(), reason="Object extension not supported." -) def test_arrow_object_and_array_support(ray_start_regular_shared): obj = types.SimpleNamespace(some_attribute="test") diff --git a/python/ray/data/tests/test_pandas_block.py b/python/ray/data/tests/test_pandas_block.py index 26b2be7e1e75..0bee6108baa7 100644 --- a/python/ray/data/tests/test_pandas_block.py +++ b/python/ray/data/tests/test_pandas_block.py @@ -15,7 +15,6 @@ PandasBlockColumnAccessor, ) from ray.data._internal.util import is_null -from ray.data.extensions.object_extension import _object_extension_type_allowed # Set seed for the test for size as it related to sampling np.random.seed(42) @@ -201,9 +200,6 @@ def test_pandas_block_timestamp_ns(ray_start_regular_shared): ), "Timestamp mismatch in PandasBlockBuilder output" -@pytest.mark.skipif( - _object_extension_type_allowed(), reason="Objects can be put into Arrow" -) def test_dict_fallback_to_pandas_block(ray_start_regular_shared): # If the UDF returns a column with dict, this throws # an error during block construction because we cannot cast dicts diff --git a/python/ray/data/tests/test_strict_mode.py b/python/ray/data/tests/test_strict_mode.py index cd7045daccb6..4c4f7d41e755 100644 --- a/python/ray/data/tests/test_strict_mode.py +++ b/python/ray/data/tests/test_strict_mode.py @@ -185,7 +185,6 @@ def test_strict_schema(ray_start_regular_shared): from ray.data._internal.pandas_block import PandasBlockSchema from ray.data.extensions.object_extension import ( ArrowPythonObjectType, - _object_extension_type_allowed, ) from ray.data.extensions.tensor_extension import ArrowTensorType @@ -203,21 +202,13 @@ def test_strict_schema(ray_start_regular_shared): ds = ray.data.from_items([{"x": 2, "y": object(), "z": [1, 2]}]) schema = ds.schema() - if _object_extension_type_allowed(): - assert isinstance(schema.base_schema, pa.lib.Schema) - assert schema.names == ["x", "y", "z"] - assert schema.types == [ - pa.int64(), - ArrowPythonObjectType(), - pa.list_(pa.int64()), - ] - else: - assert schema.names == ["x", "y", "z"] - assert schema.types == [ - pa.int64(), - object, - object, - ] + assert isinstance(schema.base_schema, pa.lib.Schema) + assert schema.names == ["x", "y", "z"] + assert schema.types == [ + pa.int64(), + ArrowPythonObjectType(), + pa.list_(pa.int64()), + ] ds = ray.data.from_numpy(np.ones((100, 10))) schema = ds.schema() diff --git a/python/ray/data/tests/test_transform_pyarrow.py b/python/ray/data/tests/test_transform_pyarrow.py index 9c81a9cf141a..7d26de8c4b16 100644 --- a/python/ray/data/tests/test_transform_pyarrow.py +++ b/python/ray/data/tests/test_transform_pyarrow.py @@ -28,7 +28,6 @@ ArrowTensorArray, ArrowTensorType, ArrowVariableShapedTensorType, - _object_extension_type_allowed, ) @@ -999,9 +998,6 @@ def map_batches(x): return ds -@pytest.mark.skipif( - _object_extension_type_allowed(), reason="Arrow table supports pickled objects" -) @pytest.mark.parametrize( "op, data", [ diff --git a/python/ray/tests/test_storage.py b/python/ray/tests/test_storage.py index 565144056fd9..9482abd168f1 100644 --- a/python/ray/tests/test_storage.py +++ b/python/ray/tests/test_storage.py @@ -258,32 +258,26 @@ def test_connecting_to_cluster(shutdown_only, storage_type): def test_add_creatable_buckets_param_if_s3_uri(): - if get_pyarrow_version() >= parse_version("9.0.0"): - # Test that the allow_bucket_creation=true query arg is added to an S3 URI. - uri = "s3://bucket/foo" - assert ( - add_creatable_buckets_param_if_s3_uri(uri) - == "s3://bucket/foo?allow_bucket_creation=true" - ) - - # Test that query args are merged (i.e. existing query args aren't dropped). - uri = "s3://bucket/foo?bar=baz" - assert ( - add_creatable_buckets_param_if_s3_uri(uri) - == "s3://bucket/foo?allow_bucket_creation=true&bar=baz" - ) - - # Test that existing allow_bucket_creation=false query arg isn't overridden. - uri = "s3://bucket/foo?allow_bucket_creation=false" - assert ( - add_creatable_buckets_param_if_s3_uri(uri) - == "s3://bucket/foo?allow_bucket_creation=false" - ) - else: - # Test that the allow_bucket_creation=true query arg is not added to an S3 URI, - # since we're using Arrow < 9. - uri = "s3://bucket/foo" - assert add_creatable_buckets_param_if_s3_uri(uri) == uri + # Test that the allow_bucket_creation=true query arg is added to an S3 URI. + uri = "s3://bucket/foo" + assert ( + add_creatable_buckets_param_if_s3_uri(uri) + == "s3://bucket/foo?allow_bucket_creation=true" + ) + + # Test that query args are merged (i.e. existing query args aren't dropped). + uri = "s3://bucket/foo?bar=baz" + assert ( + add_creatable_buckets_param_if_s3_uri(uri) + == "s3://bucket/foo?allow_bucket_creation=true&bar=baz" + ) + + # Test that existing allow_bucket_creation=false query arg isn't overridden. + uri = "s3://bucket/foo?allow_bucket_creation=false" + assert ( + add_creatable_buckets_param_if_s3_uri(uri) + == "s3://bucket/foo?allow_bucket_creation=false" + ) # Test that non-S3 URI is unchanged. uri = "gcs://bucket/foo" diff --git a/release/ray_release/byod/byod_train_persistence_test.sh b/release/ray_release/byod/byod_train_persistence_test.sh index c81d4f9f03a6..d72ed36d7b91 100755 --- a/release/ray_release/byod/byod_train_persistence_test.sh +++ b/release/ray_release/byod/byod_train_persistence_test.sh @@ -4,4 +4,4 @@ set -exo pipefail -pip3 install -U torch fsspec s3fs gcsfs pyarrow>=9.0.0 pytest +pip3 install -U torch fsspec s3fs gcsfs pyarrow>=17.0.0 pytest