Skip to content

Commit dd02c5c

Browse files
committed
fix: hypervisor name mismatch and test case issue
1 parent bfec7b2 commit dd02c5c

File tree

6 files changed

+500
-503
lines changed

6 files changed

+500
-503
lines changed

.vscode/launch.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
"mode": "auto",
2828
"console": "integratedTerminal",
2929
"env": {
30-
"KUBECONFIG": "~/.kube/config",
30+
"KUBECONFIG": "~/.kube/config-local-studio",
3131
"HYPERVISOR_PORT": "8042",
3232
"GPU_NODE_NAME": "ubuntu",
3333
},
@@ -65,7 +65,8 @@
6565
"ENABLE_WEBHOOKS": "false",
6666
"ENABLE_SCHEDULER": "true",
6767
"ENABLE_CR_CONTROLLER": "true",
68-
"NVIDIA_OPERATOR_PROGRESSIVE_MIGRATION": "true"
68+
"NVIDIA_OPERATOR_PROGRESSIVE_MIGRATION": "true",
69+
"IMPERSONATE_SERVICE_ACCOUNT": "system:serviceaccount:tensor-fusion-sys:tensor-fusion-sys"
6970
},
7071
"args": [
7172
"--metrics-path", "${workspaceFolder}/logs/metrics.log",

internal/component/hypervisor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func (h *Hypervisor) GetResourcesInfo(r client.Client, ctx context.Context, pool
8888
}
8989
key := client.ObjectKey{
9090
Namespace: utils.CurrentNamespace(),
91-
Name: fmt.Sprintf("hypervisor-%s", node.Name),
91+
Name: fmt.Sprintf("tf-hypervisor-%s", node.Name),
9292
}
9393
pod := &corev1.Pod{}
9494
err := r.Get(ctx, key, pod)

internal/controller/gpunode_controller_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import (
2929

3030
var _ = Describe("GPUNode Controller", func() {
3131
Context("When reconciling gpunodes", func() {
32-
It("should create the node discovery job and the hypervisor pod", func() {
32+
It("should create the hypervisor pod", func() {
3333
tfEnv := NewTensorFusionEnvBuilder().
3434
AddPoolWithNodeCount(1).
3535
SetGpuCountPerNode(1).
@@ -40,7 +40,7 @@ var _ = Describe("GPUNode Controller", func() {
4040
pod := &corev1.Pod{}
4141
Eventually(func(g Gomega) {
4242
err := k8sClient.Get(ctx, types.NamespacedName{
43-
Name: fmt.Sprintf("hypervisor-%s", gpuNode.Name),
43+
Name: fmt.Sprintf("tf-hypervisor-%s", gpuNode.Name),
4444
Namespace: utils.CurrentNamespace(),
4545
}, pod)
4646
g.Expect(err).ShouldNot(HaveOccurred())
@@ -59,7 +59,7 @@ var _ = Describe("GPUNode Controller", func() {
5959
Eventually(func(g Gomega) {
6060
newPod := &corev1.Pod{}
6161
err := k8sClient.Get(ctx, types.NamespacedName{
62-
Name: fmt.Sprintf("hypervisor-%s", gpuNode.Name),
62+
Name: fmt.Sprintf("tf-hypervisor-%s", gpuNode.Name),
6363
Namespace: utils.CurrentNamespace(),
6464
}, newPod)
6565
g.Expect(err).ShouldNot(HaveOccurred())

internal/controller/gpupool_controller_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ func verifyHypervisorPodHash(gpuNode *tfv1.GPUNode, hash string) {
429429
Eventually(func(g Gomega) {
430430
pod := &corev1.Pod{}
431431
g.Expect(k8sClient.Get(ctx, client.ObjectKey{
432-
Name: fmt.Sprintf("hypervisor-%s", gpuNode.Name),
432+
Name: fmt.Sprintf("tf-hypervisor-%s", gpuNode.Name),
433433
Namespace: utils.CurrentNamespace(),
434434
}, pod)).Should(Succeed())
435435
g.Expect(pod.Labels[constants.LabelKeyPodTemplateHash]).Should(Equal(hash))
@@ -463,7 +463,7 @@ func verifyHypervisorPodHashConsistently(gpuNode *tfv1.GPUNode, hash string) {
463463
Consistently(func(g Gomega) {
464464
pod := &corev1.Pod{}
465465
g.Expect(k8sClient.Get(ctx, client.ObjectKey{
466-
Name: fmt.Sprintf("hypervisor-%s", gpuNode.Name),
466+
Name: fmt.Sprintf("tf-hypervisor-%s", gpuNode.Name),
467467
Namespace: utils.CurrentNamespace(),
468468
}, pod)).Should(Succeed())
469469
g.Expect(pod.Labels[constants.LabelKeyPodTemplateHash]).Should(Equal(hash))
@@ -486,7 +486,7 @@ func verifyAllHypervisorPodHash(tfEnv *TensorFusionEnv, hash string) {
486486
for _, gpuNode := range nodeList.Items {
487487
pod := &corev1.Pod{}
488488
g.Expect(k8sClient.Get(ctx, client.ObjectKey{
489-
Name: fmt.Sprintf("hypervisor-%s", gpuNode.Name),
489+
Name: fmt.Sprintf("tf-hypervisor-%s", gpuNode.Name),
490490
Namespace: utils.CurrentNamespace(),
491491
}, pod)).Should(Succeed())
492492
g.Expect(pod.Labels[constants.LabelKeyPodTemplateHash]).Should(Equal(hash))
@@ -552,7 +552,7 @@ func verifyAllHypervisorPodHashConsistently(tfEnv *TensorFusionEnv, hash string)
552552
for _, gpuNode := range nodeList.Items {
553553
pod := &corev1.Pod{}
554554
g.Expect(k8sClient.Get(ctx, client.ObjectKey{
555-
Name: fmt.Sprintf("hypervisor-%s", gpuNode.Name),
555+
Name: fmt.Sprintf("tf-hypervisor-%s", gpuNode.Name),
556556
Namespace: utils.CurrentNamespace(),
557557
}, pod)).Should(Succeed())
558558
g.Expect(pod.Labels[constants.LabelKeyPodTemplateHash]).Should(Equal(hash))

0 commit comments

Comments
 (0)