Skip to content

Commit 41462a9

Browse files
committed
[Test][RayCluster] Use a new RayClusterReplicaFailure condition to reflect the result of reconcilePods
Signed-off-by: Rueian <[email protected]>
1 parent 8482a25 commit 41462a9

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

ray-operator/controllers/ray/raycluster_controller_unit_test.go

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/ray-project/kuberay/ray-operator/controllers/ray/common"
2626
"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
2727
"github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme"
28+
"github.com/ray-project/kuberay/ray-operator/pkg/features"
2829

2930
. "github.com/onsi/ginkgo/v2"
3031
"github.com/stretchr/testify/assert"
@@ -33,6 +34,7 @@ import (
3334
corev1 "k8s.io/api/core/v1"
3435
rbacv1 "k8s.io/api/rbac/v1"
3536
k8serrors "k8s.io/apimachinery/pkg/api/errors"
37+
"k8s.io/apimachinery/pkg/api/meta"
3638
"k8s.io/apimachinery/pkg/api/resource"
3739
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3840
"k8s.io/apimachinery/pkg/labels"
@@ -946,7 +948,7 @@ func TestReconcile_PodEvicted_DiffLess0_OK(t *testing.T) {
946948
// The head Pod with the status `Failed` will be deleted, and the function will return an
947949
// error to requeue the request with a short delay. If the function returns nil, the controller
948950
// will requeue the request after RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV (default: 300) seconds.
949-
assert.NotNil(t, err)
951+
assert.ErrorIs(t, err, reconcilePodsErr)
950952

951953
// Filter head pod
952954
err = fakeClient.List(ctx, &podList, &client.ListOptions{
@@ -1637,6 +1639,11 @@ func TestInconsistentRayClusterStatus(t *testing.T) {
16371639
newStatus = oldStatus.DeepCopy()
16381640
newStatus.ObservedGeneration = oldStatus.ObservedGeneration + 1
16391641
assert.False(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
1642+
1643+
// Case 12: `Conditions` is different => return true
1644+
newStatus = oldStatus.DeepCopy()
1645+
meta.SetStatusCondition(&newStatus.Conditions, metav1.Condition{Type: string(rayv1.RayClusterReplicaFailure), Status: metav1.ConditionTrue})
1646+
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
16401647
}
16411648

16421649
func TestCalculateStatus(t *testing.T) {
@@ -1687,6 +1694,17 @@ func TestCalculateStatus(t *testing.T) {
16871694
assert.Equal(t, headService.Name, newInstance.Status.Head.ServiceName)
16881695
assert.NotNil(t, newInstance.Status.StateTransitionTimes, "Cluster state transition timestamp should be created")
16891696
assert.Equal(t, newInstance.Status.LastUpdateTime, newInstance.Status.StateTransitionTimes[rayv1.Ready])
1697+
1698+
// Test reconcilePodsErr with the feature gate disabled
1699+
newInstance, err = r.calculateStatus(ctx, testRayCluster, reconcilePodsErr)
1700+
assert.Nil(t, err)
1701+
assert.Empty(t, newInstance.Status.Conditions)
1702+
1703+
// Test reconcilePodsErr with the feature gate enabled
1704+
defer features.SetFeatureGateDuringTest(t, features.RayClusterStatusConditions, true)()
1705+
newInstance, err = r.calculateStatus(ctx, testRayCluster, reconcilePodsErr)
1706+
assert.Nil(t, err)
1707+
assert.True(t, meta.IsStatusConditionPresentAndEqual(newInstance.Status.Conditions, string(rayv1.RayClusterReplicaFailure), metav1.ConditionTrue))
16901708
}
16911709

16921710
func TestStateTransitionTimes_NoStateChange(t *testing.T) {
@@ -1808,7 +1826,7 @@ func Test_TerminatedWorkers_NoAutoscaler(t *testing.T) {
18081826
// Pods to be deleted, the controller won't create new worker Pods during the same reconcile loop. As a result, the number of worker
18091827
// Pods will be (expectedNumWorkerPods - 1) after the reconcile loop.
18101828
err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster)
1811-
assert.NotNil(t, err)
1829+
assert.ErrorIs(t, err, reconcilePodsErr)
18121830
err = fakeClient.List(ctx, &podList, &client.ListOptions{
18131831
LabelSelector: workerSelector,
18141832
Namespace: namespaceStr,
@@ -1848,7 +1866,7 @@ func Test_TerminatedWorkers_NoAutoscaler(t *testing.T) {
18481866
// Pods to be deleted, the controller won't create new worker Pods during the same reconcile loop. As a result, the number of worker
18491867
// Pods will be (expectedNumWorkerPods - 1) after the reconcile loop.
18501868
err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster)
1851-
assert.NotNil(t, err)
1869+
assert.ErrorIs(t, err, reconcilePodsErr)
18521870
err = fakeClient.List(ctx, &podList, &client.ListOptions{
18531871
LabelSelector: workerSelector,
18541872
Namespace: namespaceStr,
@@ -1927,7 +1945,7 @@ func Test_TerminatedHead_RestartPolicy(t *testing.T) {
19271945
// The head Pod will be deleted and the controller will return an error
19281946
// instead of creating a new head Pod in the same reconcile loop.
19291947
err = testRayClusterReconciler.reconcilePods(ctx, cluster)
1930-
assert.NotNil(t, err)
1948+
assert.ErrorIs(t, err, reconcilePodsErr)
19311949
err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr))
19321950
assert.Nil(t, err, "Fail to get pod list")
19331951
assert.Equal(t, 0, len(podList.Items))
@@ -1995,7 +2013,7 @@ func Test_RunningPods_RayContainerTerminated(t *testing.T) {
19952013
// The head Pod will be deleted and the controller will return an error
19962014
// instead of creating a new head Pod in the same reconcile loop.
19972015
err = testRayClusterReconciler.reconcilePods(ctx, cluster)
1998-
assert.NotNil(t, err)
2016+
assert.ErrorIs(t, err, reconcilePodsErr)
19992017
err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr))
20002018
assert.Nil(t, err, "Fail to get pod list")
20012019
assert.Equal(t, 0, len(podList.Items))

0 commit comments

Comments
 (0)