@@ -745,6 +745,47 @@ func AddTFNodeDiscoveryConfAfterTemplate(ctx context.Context, tmpl *v1.PodTempla
745745 tmpl .Spec .Containers [0 ].Image = pool .Spec .ComponentConfig .NodeDiscovery .Image
746746 }
747747
748+ // Add initContainer to wait for NVIDIA GPU Operator toolkit-ready validation
749+ if IsCompatibleWithNvidiaOperator () {
750+ initContainerImage := pool .Spec .ComponentConfig .NodeDiscovery .Image
751+ if initContainerImage == "" {
752+ // Use the same image as the main container if not specified
753+ initContainerImage = tmpl .Spec .Containers [0 ].Image
754+ }
755+
756+ initContainer := v1.Container {
757+ Name : "toolkit-validation" ,
758+ Image : initContainerImage ,
759+ Command : []string {"sh" , "-c" },
760+ Args : []string {
761+ "until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container stack to be setup; sleep 5; done" ,
762+ },
763+ SecurityContext : & v1.SecurityContext {
764+ Privileged : ptr .To (true ),
765+ },
766+ VolumeMounts : []v1.VolumeMount {
767+ {
768+ Name : "run-nvidia-validations" ,
769+ MountPath : "/run/nvidia/validations" ,
770+ MountPropagation : ptr .To (v1 .MountPropagationHostToContainer ),
771+ },
772+ },
773+ }
774+
775+ tmpl .Spec .InitContainers = append (tmpl .Spec .InitContainers , initContainer )
776+
777+ // Add volume for NVIDIA validations
778+ tmpl .Spec .Volumes = append (tmpl .Spec .Volumes , v1.Volume {
779+ Name : "run-nvidia-validations" ,
780+ VolumeSource : v1.VolumeSource {
781+ HostPath : & v1.HostPathVolumeSource {
782+ Path : "/run/nvidia/validations" ,
783+ Type : ptr .To (v1 .HostPathDirectoryOrCreate ),
784+ },
785+ },
786+ })
787+ }
788+
748789 tmpl .Spec .Containers [0 ].Env = append (tmpl .Spec .Containers [0 ].Env , v1.EnvVar {
749790 Name : constants .NodeDiscoveryReportGPUNodeEnvName ,
750791 Value : gpuNodeName ,
0 commit comments