Skip to content

Commit fc3e1f2

Browse files
mTLS POC
Changes include - feature flag to switch mTLS on, by default it's off - new mtls reconciler which reconciles the cert manager resources when mtls is on - required ENV VARs, volumens and volumen mounts to each pod in the cluster behind the feature flag - Additional RBACs required Co-authored-by: laurafitzgerald <[email protected]>
1 parent ef7cf5e commit fc3e1f2

File tree

7 files changed

+607
-44
lines changed

7 files changed

+607
-44
lines changed

ray-operator/config/rbac/role.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,24 @@ rules:
174174
- patch
175175
- update
176176
- watch
177+
- apiGroups:
178+
- cert-manager.io
179+
resources:
180+
- issuers
181+
- certificates
182+
verbs:
183+
- create
184+
- delete
185+
- get
186+
- list
187+
- patch
188+
- update
189+
- watch
190+
- apiGroups:
191+
- cert-manager.io
192+
resources:
193+
- certificates/status
194+
verbs:
195+
- get
196+
- patch
197+
- update

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,13 @@ func (r *RayClusterReconciler) buildHeadPod(ctx context.Context, instance rayv1.
978978
if len(r.options.HeadSidecarContainers) > 0 {
979979
podConf.Spec.Containers = append(podConf.Spec.Containers, r.options.HeadSidecarContainers...)
980980
}
981+
982+
// Configure mTLS if enabled
983+
if features.Enabled(features.MTLS) {
984+
logger.Info("mTLS is enabled, configuring mTLS for head pod")
985+
r.configureMTLSForPod(&podConf, instance)
986+
}
987+
981988
logger.Info("head pod labels", "labels", podConf.Labels)
982989
creatorCRDType := getCreatorCRDType(instance)
983990
pod := common.BuildPod(ctx, podConf, rayv1.HeadNode, instance.Spec.HeadGroupSpec.RayStartParams, headPort, autoscalingEnabled, creatorCRDType, fqdnRayIP)
@@ -1006,6 +1013,13 @@ func (r *RayClusterReconciler) buildWorkerPod(ctx context.Context, instance rayv
10061013
if len(r.options.WorkerSidecarContainers) > 0 {
10071014
podTemplateSpec.Spec.Containers = append(podTemplateSpec.Spec.Containers, r.options.WorkerSidecarContainers...)
10081015
}
1016+
1017+
// Configure mTLS if enabled
1018+
if features.Enabled(features.MTLS) {
1019+
logger.Info("mTLS is enabled, configuring mTLS for head pod")
1020+
r.configureMTLSForPod(&podTemplateSpec, instance)
1021+
}
1022+
10091023
creatorCRDType := getCreatorCRDType(instance)
10101024
pod := common.BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, headPort, autoscalingEnabled, creatorCRDType, fqdnRayIP)
10111025
// Set raycluster instance as the owner and controller
@@ -1016,6 +1030,78 @@ func (r *RayClusterReconciler) buildWorkerPod(ctx context.Context, instance rayv
10161030
return pod
10171031
}
10181032

1033+
// configureMTLSForPod configures mTLS settings for a pod template if mTLS is enabled
1034+
func (r *RayClusterReconciler) configureMTLSForPod(podTemplate *corev1.PodTemplateSpec, instance rayv1.RayCluster) {
1035+
1036+
// Add TLS environment variables
1037+
r.addTLSEnvironmentVariables(&podTemplate.Spec.Containers[0])
1038+
1039+
// Determine the appropriate secret name based on node type
1040+
// We can determine this from the pod labels or container name
1041+
var secretName string
1042+
if strings.Contains(podTemplate.Spec.Containers[0].Name, "ray-head") ||
1043+
(podTemplate.Labels != nil && podTemplate.Labels[utils.RayNodeTypeLabelKey] == string(rayv1.HeadNode)) {
1044+
secretName = fmt.Sprintf("ray-head-secret-%s", instance.Name)
1045+
} else {
1046+
secretName = fmt.Sprintf("ray-worker-secret-%s", instance.Name)
1047+
}
1048+
1049+
// Add CA volumes with proper secret references
1050+
r.addCAVolumes(&podTemplate.Spec, secretName)
1051+
1052+
// Add certificate volume mounts
1053+
r.addCertVolumeMounts(&podTemplate.Spec.Containers[0])
1054+
}
1055+
1056+
// addTLSEnvironmentVariables adds Ray TLS environment variables to a container
1057+
func (r *RayClusterReconciler) addTLSEnvironmentVariables(container *corev1.Container) {
1058+
tlsEnvVars := []corev1.EnvVar{
1059+
{
1060+
Name: "MY_POD_IP",
1061+
ValueFrom: &corev1.EnvVarSource{
1062+
FieldRef: &corev1.ObjectFieldSelector{
1063+
FieldPath: "status.podIP",
1064+
},
1065+
},
1066+
},
1067+
{Name: "RAY_USE_TLS", Value: "1"},
1068+
{Name: "RAY_TLS_SERVER_CERT", Value: "/home/ray/workspace/tls/server.crt"},
1069+
{Name: "RAY_TLS_SERVER_KEY", Value: "/home/ray/workspace/tls/server.key"},
1070+
{Name: "RAY_TLS_CA_CERT", Value: "/home/ray/workspace/tls/ca.crt"},
1071+
}
1072+
1073+
container.Env = append(container.Env, tlsEnvVars...)
1074+
}
1075+
1076+
// addCAVolumes adds CA and certificate volumes to a pod spec
1077+
func (r *RayClusterReconciler) addCAVolumes(podSpec *corev1.PodSpec, secretName string) {
1078+
caVolumes := []corev1.Volume{
1079+
{
1080+
Name: "ca-vol",
1081+
VolumeSource: corev1.VolumeSource{
1082+
Secret: &corev1.SecretVolumeSource{
1083+
SecretName: secretName,
1084+
},
1085+
},
1086+
},
1087+
}
1088+
1089+
podSpec.Volumes = append(podSpec.Volumes, caVolumes...)
1090+
}
1091+
1092+
// addCertVolumeMounts adds certificate volume mounts to a container
1093+
func (r *RayClusterReconciler) addCertVolumeMounts(container *corev1.Container) {
1094+
volumeMounts := []corev1.VolumeMount{
1095+
{
1096+
Name: "ca-vol",
1097+
MountPath: "/home/ray/workspace/tls",
1098+
ReadOnly: true,
1099+
},
1100+
}
1101+
1102+
container.VolumeMounts = append(container.VolumeMounts, volumeMounts...)
1103+
}
1104+
10191105
func (r *RayClusterReconciler) buildRedisCleanupJob(ctx context.Context, instance rayv1.RayCluster) batchv1.Job {
10201106
logger := ctrl.LoggerFrom(ctx)
10211107

0 commit comments

Comments
 (0)