Skip to content

Commit 7327753

Browse files
committed
feat(RHOAIENG-29330):Deny RayCluster creation with Ray Version mismatches fixed
1 parent 5a77f7b commit 7327753

File tree

6 files changed

+556
-3
lines changed

6 files changed

+556
-3
lines changed
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
# Copyright 2022-2025 IBM, Red Hat
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from codeflare_sdk.common.utils.validation import (
16+
extract_ray_version_from_image,
17+
validate_ray_version_compatibility,
18+
)
19+
from codeflare_sdk.common.utils.constants import RAY_VERSION
20+
21+
22+
class TestRayVersionDetection:
23+
"""Test Ray version detection from container image names."""
24+
25+
def test_extract_ray_version_standard_format(self):
26+
"""Test extraction from standard Ray image formats."""
27+
# Standard format
28+
assert extract_ray_version_from_image("ray:2.47.1") == "2.47.1"
29+
assert extract_ray_version_from_image("ray:2.46.0") == "2.46.0"
30+
assert extract_ray_version_from_image("ray:1.13.0") == "1.13.0"
31+
32+
def test_extract_ray_version_with_registry(self):
33+
"""Test extraction from images with registry prefixes."""
34+
assert extract_ray_version_from_image("quay.io/ray:2.47.1") == "2.47.1"
35+
assert (
36+
extract_ray_version_from_image("docker.io/rayproject/ray:2.47.1")
37+
== "2.47.1"
38+
)
39+
assert (
40+
extract_ray_version_from_image("gcr.io/my-project/ray:2.47.1") == "2.47.1"
41+
)
42+
43+
def test_extract_ray_version_with_suffixes(self):
44+
"""Test extraction from images with version suffixes."""
45+
assert (
46+
extract_ray_version_from_image("quay.io/modh/ray:2.47.1-py311-cu121")
47+
== "2.47.1"
48+
)
49+
assert extract_ray_version_from_image("ray:2.47.1-py311") == "2.47.1"
50+
assert extract_ray_version_from_image("ray:2.47.1-gpu") == "2.47.1"
51+
assert extract_ray_version_from_image("ray:2.47.1-rocm62") == "2.47.1"
52+
53+
def test_extract_ray_version_complex_registry_paths(self):
54+
"""Test extraction from complex registry paths."""
55+
assert (
56+
extract_ray_version_from_image("quay.io/modh/ray:2.47.1-py311-cu121")
57+
== "2.47.1"
58+
)
59+
assert (
60+
extract_ray_version_from_image("registry.company.com/team/ray:2.47.1")
61+
== "2.47.1"
62+
)
63+
64+
def test_extract_ray_version_no_version_found(self):
65+
"""Test cases where no version can be extracted."""
66+
# SHA-based tags
67+
assert (
68+
extract_ray_version_from_image(
69+
"quay.io/modh/ray@sha256:6d076aeb38ab3c34a6a2ef0f58dc667089aa15826fa08a73273c629333e12f1e"
70+
)
71+
is None
72+
)
73+
74+
# Non-semantic versions
75+
assert extract_ray_version_from_image("ray:latest") is None
76+
assert extract_ray_version_from_image("ray:nightly") is None
77+
assert (
78+
extract_ray_version_from_image("ray:v2.47") is None
79+
) # Missing patch version
80+
81+
# Non-Ray images
82+
assert extract_ray_version_from_image("python:3.11") is None
83+
assert extract_ray_version_from_image("ubuntu:20.04") is None
84+
85+
# Empty or None
86+
assert extract_ray_version_from_image("") is None
87+
assert extract_ray_version_from_image(None) is None
88+
89+
def test_extract_ray_version_edge_cases(self):
90+
"""Test edge cases for version extraction."""
91+
# Version with 'v' prefix should not match our pattern
92+
assert extract_ray_version_from_image("ray:v2.47.1") is None
93+
94+
# Multiple version-like patterns - should match the first valid one
95+
assert (
96+
extract_ray_version_from_image("registry/ray:2.47.1-based-on-1.0.0")
97+
== "2.47.1"
98+
)
99+
100+
101+
class TestRayVersionValidation:
102+
"""Test Ray version compatibility validation."""
103+
104+
def test_validate_compatible_versions(self):
105+
"""Test validation with compatible Ray versions."""
106+
# Exact match
107+
is_compatible, is_warning, message = validate_ray_version_compatibility(
108+
f"ray:{RAY_VERSION}"
109+
)
110+
assert is_compatible is True
111+
assert is_warning is False
112+
assert "Ray versions match" in message
113+
114+
# With registry and suffixes
115+
is_compatible, is_warning, message = validate_ray_version_compatibility(
116+
f"quay.io/modh/ray:{RAY_VERSION}-py311-cu121"
117+
)
118+
assert is_compatible is True
119+
assert is_warning is False
120+
assert "Ray versions match" in message
121+
122+
def test_validate_incompatible_versions(self):
123+
"""Test validation with incompatible Ray versions."""
124+
# Different version
125+
is_compatible, is_warning, message = validate_ray_version_compatibility(
126+
"ray:2.46.0"
127+
)
128+
assert is_compatible is False
129+
assert is_warning is False
130+
assert "Ray version mismatch detected" in message
131+
assert "CodeFlare SDK uses Ray" in message
132+
assert "runtime image uses Ray" in message
133+
134+
# Older version
135+
is_compatible, is_warning, message = validate_ray_version_compatibility(
136+
"ray:1.13.0"
137+
)
138+
assert is_compatible is False
139+
assert is_warning is False
140+
assert "Ray version mismatch detected" in message
141+
142+
def test_validate_empty_image(self):
143+
"""Test validation with no custom image (should use default)."""
144+
# Empty string
145+
is_compatible, is_warning, message = validate_ray_version_compatibility("")
146+
assert is_compatible is True
147+
assert is_warning is False
148+
assert "Using default Ray image compatible with SDK" in message
149+
150+
# None
151+
is_compatible, is_warning, message = validate_ray_version_compatibility(None)
152+
assert is_compatible is True
153+
assert is_warning is False
154+
assert "Using default Ray image compatible with SDK" in message
155+
156+
def test_validate_unknown_version(self):
157+
"""Test validation when version cannot be determined."""
158+
# SHA-based image
159+
is_compatible, is_warning, message = validate_ray_version_compatibility(
160+
"quay.io/modh/ray@sha256:6d076aeb38ab3c34a6a2ef0f58dc667089aa15826fa08a73273c629333e12f1e"
161+
)
162+
assert is_compatible is True
163+
assert is_warning is True
164+
assert "Cannot determine Ray version" in message
165+
166+
# Custom image without version
167+
is_compatible, is_warning, message = validate_ray_version_compatibility(
168+
"my-custom-ray:latest"
169+
)
170+
assert is_compatible is True
171+
assert is_warning is True
172+
assert "Cannot determine Ray version" in message
173+
174+
def test_validate_custom_sdk_version(self):
175+
"""Test validation with custom SDK version."""
176+
# Compatible with custom SDK version
177+
is_compatible, is_warning, message = validate_ray_version_compatibility(
178+
"ray:2.46.0", "2.46.0"
179+
)
180+
assert is_compatible is True
181+
assert is_warning is False
182+
assert "Ray versions match" in message
183+
184+
# Incompatible with custom SDK version
185+
is_compatible, is_warning, message = validate_ray_version_compatibility(
186+
"ray:2.47.1", "2.46.0"
187+
)
188+
assert is_compatible is False
189+
assert is_warning is False
190+
assert "CodeFlare SDK uses Ray 2.46.0" in message
191+
assert "runtime image uses Ray 2.47.1" in message
192+
193+
def test_validate_message_content(self):
194+
"""Test that validation messages contain expected guidance."""
195+
# Mismatch message should contain helpful guidance
196+
is_compatible, is_warning, message = validate_ray_version_compatibility(
197+
"ray:2.46.0"
198+
)
199+
assert is_compatible is False
200+
assert is_warning is False
201+
assert "compatibility issues" in message.lower()
202+
assert "unexpected behavior" in message.lower()
203+
assert "please use a runtime image" in message.lower()
204+
assert "update your sdk version" in message.lower()
205+
206+
def test_semantic_version_comparison(self):
207+
"""Test that semantic version comparison works correctly."""
208+
# Test that 2.10.0 > 2.9.1 (would fail with string comparison)
209+
is_compatible, is_warning, message = validate_ray_version_compatibility(
210+
"ray:2.10.0", "2.9.1"
211+
)
212+
assert is_compatible is False
213+
assert is_warning is False
214+
assert "CodeFlare SDK uses Ray 2.9.1" in message
215+
assert "runtime image uses Ray 2.10.0" in message
216+
217+
# Test that 2.9.1 < 2.10.0 (would fail with string comparison)
218+
is_compatible, is_warning, message = validate_ray_version_compatibility(
219+
"ray:2.9.1", "2.10.0"
220+
)
221+
assert is_compatible is False
222+
assert is_warning is False
223+
assert "CodeFlare SDK uses Ray 2.10.0" in message
224+
assert "runtime image uses Ray 2.9.1" in message
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Copyright 2022-2025 IBM, Red Hat
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Validation utilities for the CodeFlare SDK.
17+
18+
This module contains validation functions used across the SDK for ensuring
19+
configuration compatibility and correctness.
20+
"""
21+
22+
import logging
23+
import re
24+
from typing import Optional, Tuple
25+
from packaging.version import Version, InvalidVersion
26+
from .constants import RAY_VERSION
27+
28+
logger = logging.getLogger(__name__)
29+
30+
31+
def extract_ray_version_from_image(image_name: str) -> Optional[str]:
32+
"""
33+
Extract Ray version from a container image name.
34+
35+
Supports various image naming patterns:
36+
- quay.io/modh/ray:2.47.1-py311-cu121
37+
- ray:2.47.1
38+
- some-registry/ray:2.47.1-py311
39+
- quay.io/modh/ray@sha256:... (falls back to None)
40+
41+
Args:
42+
image_name: The container image name/tag
43+
44+
Returns:
45+
The extracted Ray version, or None if not found
46+
"""
47+
if not image_name:
48+
return None
49+
50+
# Pattern to match semantic version after ray: or ray/
51+
# Looks for patterns like ray:2.47.1, ray:2.47.1-py311, etc.
52+
patterns = [
53+
r"ray:(\d+\.\d+\.\d+)", # ray:2.47.1
54+
r"ray/[^:]*:(\d+\.\d+\.\d+)", # registry/ray:2.47.1
55+
r"/ray:(\d+\.\d+\.\d+)", # any-registry/ray:2.47.1
56+
]
57+
58+
for pattern in patterns:
59+
match = re.search(pattern, image_name)
60+
if match:
61+
return match.group(1)
62+
63+
# If we can't extract version, return None to indicate unknown
64+
return None
65+
66+
67+
def validate_ray_version_compatibility(
68+
image_name: str, sdk_ray_version: str = RAY_VERSION
69+
) -> Tuple[bool, bool, str]:
70+
"""
71+
Validate that the Ray version in the runtime image matches the SDK's Ray version.
72+
73+
Args:
74+
image_name: The container image name/tag
75+
sdk_ray_version: The Ray version used by the CodeFlare SDK
76+
77+
Returns:
78+
tuple: (is_compatible, is_warning, message)
79+
- is_compatible: True if versions match or cannot be determined, False if mismatch
80+
- is_warning: True if this is a warning (non-fatal), False otherwise
81+
- message: Descriptive message about the validation result
82+
"""
83+
if not image_name:
84+
# No custom image specified, will use default - this is compatible
85+
logger.debug("Using default Ray image compatible with SDK")
86+
return True, False, "Using default Ray image compatible with SDK"
87+
88+
image_ray_version = extract_ray_version_from_image(image_name)
89+
90+
if image_ray_version is None:
91+
# Cannot determine version from image name, issue a warning but allow
92+
return (
93+
True,
94+
True,
95+
f"Cannot determine Ray version from image '{image_name}'. Please ensure it's compatible with Ray {sdk_ray_version}",
96+
)
97+
98+
# Use semantic version comparison for robust version checking
99+
try:
100+
sdk_version = Version(sdk_ray_version)
101+
image_version = Version(image_ray_version)
102+
103+
if image_version != sdk_version:
104+
# Version mismatch detected
105+
message = (
106+
f"Ray version mismatch detected!\n"
107+
f"CodeFlare SDK uses Ray {sdk_ray_version}, but runtime image uses Ray {image_ray_version}.\n"
108+
f"This mismatch can cause compatibility issues and unexpected behavior.\n"
109+
f"Please use a runtime image with Ray {sdk_ray_version} or update your SDK version."
110+
)
111+
return False, False, message
112+
except InvalidVersion as e:
113+
# If version parsing fails, fall back to string comparison with a warning
114+
logger.warning(
115+
f"Failed to parse version for comparison ({e}), falling back to string comparison"
116+
)
117+
if image_ray_version != sdk_ray_version:
118+
message = (
119+
f"Ray version mismatch detected!\n"
120+
f"CodeFlare SDK uses Ray {sdk_ray_version}, but runtime image uses Ray {image_ray_version}.\n"
121+
f"This mismatch can cause compatibility issues and unexpected behavior.\n"
122+
f"Please use a runtime image with Ray {sdk_ray_version} or update your SDK version."
123+
)
124+
return False, False, message
125+
126+
# Versions match
127+
logger.debug(
128+
f"Ray version validation successful: SDK and runtime image both use Ray {sdk_ray_version}"
129+
)
130+
return (
131+
True,
132+
False,
133+
f"Ray versions match: SDK and runtime image both use Ray {sdk_ray_version}",
134+
)

src/codeflare_sdk/ray/cluster/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from typing import Dict, List, Optional, Union, get_args, get_origin
2525
from kubernetes.client import V1Toleration, V1Volume, V1VolumeMount
2626

27+
2728
dir = pathlib.Path(__file__).parent.parent.resolve()
2829

2930
# https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html

src/codeflare_sdk/ray/cluster/test_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 IBM, Red Hat
1+
# Copyright 2022-2025 IBM, Red Hat
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -20,11 +20,13 @@
2020
get_template_variables,
2121
)
2222
from codeflare_sdk.ray.cluster.cluster import ClusterConfiguration, Cluster
23+
from codeflare_sdk.common.utils.constants import RAY_VERSION
2324
from pathlib import Path
2425
import filecmp
2526
import pytest
2627
import os
2728
import yaml
29+
import warnings
2830

2931
parent = Path(__file__).resolve().parents[4] # project directory
3032
expected_clusters_dir = f"{parent}/tests/test_cluster_yamls"

0 commit comments

Comments
 (0)