@@ -2004,3 +2004,193 @@ def test_update_existing_cluster_for_scripts_api_errors(mocker, auto_mock_setup)
2004
2004
rayjob ._update_existing_cluster_for_scripts (
2005
2005
"test-configmap" , mock_config_builder
2006
2006
)
2007
+
2008
+
2009
+ def test_rayjob_kueue_label_no_default_queue (auto_mock_setup , mocker , caplog ):
2010
+ """Test RayJob falls back to 'default' queue when no default queue exists."""
2011
+ mocker .patch (
2012
+ "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name" ,
2013
+ return_value = None ,
2014
+ )
2015
+
2016
+ mock_api_instance = auto_mock_setup ["rayjob_api" ]
2017
+ mock_api_instance .submit_job .return_value = {"metadata" : {"name" : "test-job" }}
2018
+
2019
+ cluster_config = ManagedClusterConfig ()
2020
+ rayjob = RayJob (
2021
+ job_name = "test-job" ,
2022
+ cluster_config = cluster_config ,
2023
+ entrypoint = "python script.py" ,
2024
+ )
2025
+
2026
+ with caplog .at_level ("WARNING" ):
2027
+ rayjob .submit ()
2028
+
2029
+ # Verify the submitted job has the fallback label
2030
+ call_args = mock_api_instance .submit_job .call_args
2031
+ submitted_job = call_args .kwargs ["job" ]
2032
+ assert submitted_job ["metadata" ]["labels" ]["kueue.x-k8s.io/queue-name" ] == "default"
2033
+
2034
+ # Verify warning was logged
2035
+ assert "No default Kueue LocalQueue found" in caplog .text
2036
+
2037
+
2038
+ def test_rayjob_kueue_explicit_local_queue (auto_mock_setup ):
2039
+ """Test RayJob uses explicitly specified local queue."""
2040
+ mock_api_instance = auto_mock_setup ["rayjob_api" ]
2041
+ mock_api_instance .submit_job .return_value = {"metadata" : {"name" : "test-job" }}
2042
+
2043
+ cluster_config = ManagedClusterConfig ()
2044
+ rayjob = RayJob (
2045
+ job_name = "test-job" ,
2046
+ cluster_config = cluster_config ,
2047
+ entrypoint = "python script.py" ,
2048
+ local_queue = "custom-queue" ,
2049
+ )
2050
+
2051
+ rayjob .submit ()
2052
+
2053
+ # Verify the submitted job has the explicit queue label
2054
+ call_args = mock_api_instance .submit_job .call_args
2055
+ submitted_job = call_args .kwargs ["job" ]
2056
+ assert (
2057
+ submitted_job ["metadata" ]["labels" ]["kueue.x-k8s.io/queue-name" ]
2058
+ == "custom-queue"
2059
+ )
2060
+
2061
+
2062
+ def test_rayjob_no_kueue_label_for_existing_cluster (auto_mock_setup ):
2063
+ """Test RayJob doesn't add Kueue label for existing clusters."""
2064
+ mock_api_instance = auto_mock_setup ["rayjob_api" ]
2065
+ mock_api_instance .submit_job .return_value = {"metadata" : {"name" : "test-job" }}
2066
+
2067
+ # Using existing cluster (no cluster_config)
2068
+ rayjob = RayJob (
2069
+ job_name = "test-job" ,
2070
+ cluster_name = "existing-cluster" ,
2071
+ entrypoint = "python script.py" ,
2072
+ )
2073
+
2074
+ rayjob .submit ()
2075
+
2076
+ # Verify no Kueue label was added
2077
+ call_args = mock_api_instance .submit_job .call_args
2078
+ submitted_job = call_args .kwargs ["job" ]
2079
+ assert "kueue.x-k8s.io/queue-name" not in submitted_job ["metadata" ]["labels" ]
2080
+
2081
+
2082
+ def test_rayjob_with_ttl_and_deadline (auto_mock_setup ):
2083
+ """Test RayJob with TTL and active deadline seconds."""
2084
+ mock_api_instance = auto_mock_setup ["rayjob_api" ]
2085
+ mock_api_instance .submit_job .return_value = {"metadata" : {"name" : "test-job" }}
2086
+
2087
+ cluster_config = ManagedClusterConfig ()
2088
+ rayjob = RayJob (
2089
+ job_name = "test-job" ,
2090
+ cluster_config = cluster_config ,
2091
+ entrypoint = "python script.py" ,
2092
+ ttl_seconds_after_finished = 300 ,
2093
+ active_deadline_seconds = 600 ,
2094
+ )
2095
+
2096
+ rayjob .submit ()
2097
+
2098
+ # Verify TTL and deadline were set
2099
+ call_args = mock_api_instance .submit_job .call_args
2100
+ submitted_job = call_args .kwargs ["job" ]
2101
+ assert submitted_job ["spec" ]["ttlSecondsAfterFinished" ] == 300
2102
+ assert submitted_job ["spec" ]["activeDeadlineSeconds" ] == 600
2103
+
2104
+
2105
+ def test_rayjob_shutdown_after_job_finishes (auto_mock_setup ):
2106
+ """Test RayJob sets shutdownAfterJobFinishes correctly."""
2107
+ mock_api_instance = auto_mock_setup ["rayjob_api" ]
2108
+ mock_api_instance .submit_job .return_value = {"metadata" : {"name" : "test-job" }}
2109
+
2110
+ # Test with managed cluster (should shutdown)
2111
+ cluster_config = ManagedClusterConfig ()
2112
+ rayjob = RayJob (
2113
+ job_name = "test-job" ,
2114
+ cluster_config = cluster_config ,
2115
+ entrypoint = "python script.py" ,
2116
+ )
2117
+
2118
+ rayjob .submit ()
2119
+
2120
+ call_args = mock_api_instance .submit_job .call_args
2121
+ submitted_job = call_args .kwargs ["job" ]
2122
+ assert submitted_job ["spec" ]["shutdownAfterJobFinishes" ] is True
2123
+
2124
+ # Test with existing cluster (should not shutdown)
2125
+ rayjob2 = RayJob (
2126
+ job_name = "test-job2" ,
2127
+ cluster_name = "existing-cluster" ,
2128
+ entrypoint = "python script.py" ,
2129
+ )
2130
+
2131
+ rayjob2 .submit ()
2132
+
2133
+ call_args2 = mock_api_instance .submit_job .call_args
2134
+ submitted_job2 = call_args2 .kwargs ["job" ]
2135
+ assert submitted_job2 ["spec" ]["shutdownAfterJobFinishes" ] is False
2136
+
2137
+
2138
+ def test_rayjob_stop_delete_resubmit_logging (auto_mock_setup , caplog ):
2139
+ """Test logging for stop, delete, and resubmit operations."""
2140
+ mock_api_instance = auto_mock_setup ["rayjob_api" ]
2141
+
2142
+ # Test stop with logging
2143
+ mock_api_instance .suspend_job .return_value = {
2144
+ "metadata" : {"name" : "test-rayjob" },
2145
+ "spec" : {"suspend" : True },
2146
+ }
2147
+
2148
+ rayjob = RayJob (
2149
+ job_name = "test-rayjob" ,
2150
+ cluster_name = "test-cluster" ,
2151
+ namespace = "test-namespace" ,
2152
+ entrypoint = "python script.py" ,
2153
+ )
2154
+
2155
+ with caplog .at_level ("INFO" ):
2156
+ result = rayjob .stop ()
2157
+
2158
+ assert result is True
2159
+ assert "Successfully stopped the RayJob test-rayjob" in caplog .text
2160
+
2161
+ # Test delete with logging
2162
+ caplog .clear ()
2163
+ mock_api_instance .delete_job .return_value = True
2164
+
2165
+ with caplog .at_level ("INFO" ):
2166
+ result = rayjob .delete ()
2167
+
2168
+ assert result is True
2169
+ assert "Successfully deleted the RayJob test-rayjob" in caplog .text
2170
+
2171
+ # Test resubmit with logging
2172
+ caplog .clear ()
2173
+ mock_api_instance .resubmit_job .return_value = {
2174
+ "metadata" : {"name" : "test-rayjob" },
2175
+ "spec" : {"suspend" : False },
2176
+ }
2177
+
2178
+ with caplog .at_level ("INFO" ):
2179
+ result = rayjob .resubmit ()
2180
+
2181
+ assert result is True
2182
+ assert "Successfully resubmitted the RayJob test-rayjob" in caplog .text
2183
+
2184
+
2185
+ def test_rayjob_initialization_logging (auto_mock_setup , caplog ):
2186
+ """Test RayJob initialization logging."""
2187
+ with caplog .at_level ("INFO" ):
2188
+ cluster_config = ManagedClusterConfig ()
2189
+ rayjob = RayJob (
2190
+ job_name = "test-job" ,
2191
+ cluster_config = cluster_config ,
2192
+ entrypoint = "python script.py" ,
2193
+ )
2194
+
2195
+ assert "Creating new cluster: test-job-cluster" in caplog .text
2196
+ assert "Initialized RayJob: test-job in namespace: test-namespace" in caplog .text
0 commit comments