-
Notifications
You must be signed in to change notification settings - Fork 622
Non-Istio mTLS POC #3952
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
kryanbeane
wants to merge
2
commits into
ray-project:master
Choose a base branch
from
kryanbeane:mtls-poc
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+980
−59
Draft
Non-Istio mTLS POC #3952
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -978,6 +978,13 @@ func (r *RayClusterReconciler) buildHeadPod(ctx context.Context, instance rayv1. | |||||
if len(r.options.HeadSidecarContainers) > 0 { | ||||||
podConf.Spec.Containers = append(podConf.Spec.Containers, r.options.HeadSidecarContainers...) | ||||||
} | ||||||
|
||||||
// Configure mTLS if enabled | ||||||
if features.Enabled(features.MTLS) { | ||||||
logger.Info("mTLS is enabled, configuring mTLS for head pod") | ||||||
r.configureMTLSForPod(&podConf, instance) | ||||||
} | ||||||
|
||||||
logger.Info("head pod labels", "labels", podConf.Labels) | ||||||
creatorCRDType := getCreatorCRDType(instance) | ||||||
pod := common.BuildPod(ctx, podConf, rayv1.HeadNode, instance.Spec.HeadGroupSpec.RayStartParams, headPort, autoscalingEnabled, creatorCRDType, fqdnRayIP) | ||||||
|
@@ -1006,6 +1013,13 @@ func (r *RayClusterReconciler) buildWorkerPod(ctx context.Context, instance rayv | |||||
if len(r.options.WorkerSidecarContainers) > 0 { | ||||||
podTemplateSpec.Spec.Containers = append(podTemplateSpec.Spec.Containers, r.options.WorkerSidecarContainers...) | ||||||
} | ||||||
|
||||||
// Configure mTLS if enabled | ||||||
if features.Enabled(features.MTLS) { | ||||||
logger.Info("mTLS is enabled, configuring mTLS for head pod") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
r.configureMTLSForPod(&podTemplateSpec, instance) | ||||||
} | ||||||
|
||||||
creatorCRDType := getCreatorCRDType(instance) | ||||||
pod := common.BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, headPort, autoscalingEnabled, creatorCRDType, fqdnRayIP) | ||||||
// Set raycluster instance as the owner and controller | ||||||
|
@@ -1016,6 +1030,102 @@ func (r *RayClusterReconciler) buildWorkerPod(ctx context.Context, instance rayv | |||||
return pod | ||||||
} | ||||||
|
||||||
// configureMTLSForPod configures mTLS settings for a pod template if mTLS is enabled | ||||||
func (r *RayClusterReconciler) configureMTLSForPod(podTemplate *corev1.PodTemplateSpec, instance rayv1.RayCluster) { | ||||||
// Determine the appropriate secret name based on node type | ||||||
// We can determine this from the pod labels or container name | ||||||
var secretName string | ||||||
var isWorker bool | ||||||
if podTemplate.Labels != nil && podTemplate.Labels[utils.RayNodeTypeLabelKey] == string(rayv1.HeadNode) { | ||||||
isWorker = false | ||||||
secretName = fmt.Sprintf("ray-head-secret-%s", instance.Name) | ||||||
} else { | ||||||
isWorker = true | ||||||
secretName = fmt.Sprintf("ray-worker-secret-%s", instance.Name) | ||||||
} | ||||||
|
||||||
for i := range podTemplate.Spec.Containers { | ||||||
// Add TLS environment variables | ||||||
r.addTLSEnvironmentVariables(&podTemplate.Spec.Containers[i], isWorker) | ||||||
// Add certificate volume mounts | ||||||
r.addCertVolumeMounts(&podTemplate.Spec.Containers[i]) | ||||||
} | ||||||
|
||||||
// Add mTLS configuration to init containers as well | ||||||
for i := range podTemplate.Spec.InitContainers { | ||||||
// Add TLS environment variables | ||||||
r.addTLSEnvironmentVariables(&podTemplate.Spec.InitContainers[i], isWorker) | ||||||
// Add certificate volume mounts | ||||||
r.addCertVolumeMounts(&podTemplate.Spec.InitContainers[i]) | ||||||
} | ||||||
|
||||||
// Add CA volumes with proper secret references | ||||||
r.addCAVolumes(&podTemplate.Spec, secretName) | ||||||
} | ||||||
|
||||||
// addTLSEnvironmentVariables adds Ray TLS environment variables to a container | ||||||
func (r *RayClusterReconciler) addTLSEnvironmentVariables(container *corev1.Container, isWorker bool) { | ||||||
// Check if this is a worker container by looking at the container name | ||||||
|
||||||
if isWorker { | ||||||
// Worker pods only need basic TLS environment variables | ||||||
tlsEnvVars := []corev1.EnvVar{ | ||||||
{Name: "RAY_USE_TLS", Value: "1"}, | ||||||
{Name: "RAY_TLS_SERVER_CERT", Value: "/home/ray/workspace/tls/tls.crt"}, | ||||||
{Name: "RAY_TLS_SERVER_KEY", Value: "/home/ray/workspace/tls/tls.key"}, | ||||||
{Name: "RAY_TLS_CA_CERT", Value: "/home/ray/workspace/tls/ca.crt"}, | ||||||
} | ||||||
container.Env = append(container.Env, tlsEnvVars...) | ||||||
return | ||||||
} | ||||||
|
||||||
// Head pods need all TLS environment variables | ||||||
tlsEnvVars := []corev1.EnvVar{ | ||||||
{ | ||||||
Name: "MY_POD_IP", | ||||||
ValueFrom: &corev1.EnvVarSource{ | ||||||
FieldRef: &corev1.ObjectFieldSelector{ | ||||||
FieldPath: "status.podIP", | ||||||
}, | ||||||
}, | ||||||
}, | ||||||
{Name: "RAY_USE_TLS", Value: "1"}, | ||||||
{Name: "RAY_TLS_SERVER_CERT", Value: "/home/ray/workspace/tls/tls.crt"}, | ||||||
{Name: "RAY_TLS_SERVER_KEY", Value: "/home/ray/workspace/tls/tls.key"}, | ||||||
{Name: "RAY_TLS_CA_CERT", Value: "/home/ray/workspace/tls/ca.crt"}, | ||||||
} | ||||||
container.Env = append(container.Env, tlsEnvVars...) | ||||||
} | ||||||
|
||||||
// addCAVolumes adds CA and certificate volumes to a pod spec | ||||||
func (r *RayClusterReconciler) addCAVolumes(podSpec *corev1.PodSpec, secretName string) { | ||||||
caVolumes := []corev1.Volume{ | ||||||
{ | ||||||
Name: "ca-vol", | ||||||
VolumeSource: corev1.VolumeSource{ | ||||||
Secret: &corev1.SecretVolumeSource{ | ||||||
SecretName: secretName, | ||||||
}, | ||||||
}, | ||||||
}, | ||||||
} | ||||||
|
||||||
podSpec.Volumes = append(podSpec.Volumes, caVolumes...) | ||||||
} | ||||||
|
||||||
// addCertVolumeMounts adds certificate volume mounts to a container | ||||||
func (r *RayClusterReconciler) addCertVolumeMounts(container *corev1.Container) { | ||||||
volumeMounts := []corev1.VolumeMount{ | ||||||
{ | ||||||
Name: "ca-vol", | ||||||
MountPath: "/home/ray/workspace/tls", | ||||||
ReadOnly: true, | ||||||
}, | ||||||
} | ||||||
|
||||||
container.VolumeMounts = append(container.VolumeMounts, volumeMounts...) | ||||||
} | ||||||
|
||||||
func (r *RayClusterReconciler) buildRedisCleanupJob(ctx context.Context, instance rayv1.RayCluster) batchv1.Job { | ||||||
logger := ctrl.LoggerFrom(ctx) | ||||||
|
||||||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The feature gate shouldn't toggle whether the RayCluster should enable mTLS. The feature gate is for allowing / disallowing use of the feature. There should probably be a separate API (field or annotation) to enable mTLS if the feature gate is enabled. Default behavior is still to disable mTLS
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, so Kuberay feature gates aren't for actually enabling a specific feature? Just enabling the use of a feature? From chatting to others, we want to avoid API changes to any of the CRDs so that won't be an option so maybe an annotation is the right way to go
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@andrewsykim would it make more sense to include it here? https://github.com/ray-project/kuberay/blob/master/ray-operator/apis/config/v1alpha1/configuration_types.go as an optional mechanism for all rayclusters under the reconciliation of a kuberay installation given the similar suggestion in #4098
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think Andrew is suggesting that there should be a choice for a RayCluster to opt into mTLS or not. Is that okay for you, @laurafitzgerald? Or do you want to enforce mTLS on all RayClusters when the feature is enabled?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We see this an environment specific configuration. aka an admin wants to enforce this for all RayClusters in the environment they are administrating. Is there a recommended way or existing architecture where this pattern is used inside kuberay? the configuration types for example?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There needs to be a field in RayCluster to enable mTLS. If a platform admin wants to force mTLS on all RayClusters, they can use a mutating admission policy or webhook. The feature gate also needs to exist, we use the feature gate to introduce new features as "alpha" status.