diff --git a/config/manager/controller_manager_config.yaml b/config/manager/controller_manager_config.yaml index e7efa5405..b6ffb68d0 100644 --- a/config/manager/controller_manager_config.yaml +++ b/config/manager/controller_manager_config.yaml @@ -4,3 +4,6 @@ leaderElection: leaderElect: true internalCertManagement: enable: true +# If you want to enable gang scheduling, you need to uncomment the following section(take volcano as an example) +# gangSchedulingManagement: +# schedulerProvider: volcano diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 731e257c8..4bcb544b9 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -14,4 +14,6 @@ resources: - auth_proxy_role.yaml - auth_proxy_role_binding.yaml - auth_proxy_client_clusterrole.yaml -- auth_proxy_client_binding.yaml \ No newline at end of file +- auth_proxy_client_binding.yaml +# If you want to enable gang scheduling with volcano, you need to uncomment the following line +# - volcano_rbac.yaml \ No newline at end of file diff --git a/config/rbac/volcano_rbac.yaml b/config/rbac/volcano_rbac.yaml new file mode 100644 index 000000000..c8c906af1 --- /dev/null +++ b/config/rbac/volcano_rbac.yaml @@ -0,0 +1,30 @@ +# This file provides RBAC permissions for volcano gang scheduling. +# To enable volcano gang scheduling, uncomment this file in kustomization.yaml +# and ensure gangSchedulingManagement.schedulerProvider is set to "volcano" in controller configuration. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: lws-volcano-role +rules: +- apiGroups: + - scheduling.volcano.sh + resources: + - podgroups + verbs: + - create + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: lws-volcano-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: lws-volcano-role +subjects: +- kind: ServiceAccount + name: lws-controller-manager + namespace: system \ No newline at end of file diff --git a/docs/examples/sample/gang-scheduling/README.md b/docs/examples/sample/gang-scheduling/README.md index e1a967e2a..cad088cfe 100644 --- a/docs/examples/sample/gang-scheduling/README.md +++ b/docs/examples/sample/gang-scheduling/README.md @@ -32,7 +32,22 @@ To enable gang scheduling, you must enable the feature flag and specify `volcano Refer to the [install-by-kubectl](https://lws.sigs.k8s.io/docs/installation/#install-by-kubectl). Gang scheduling is **disabled by default**. To enable gang scheduling capabilities, you need to: - 1. **Update the configuration ConfigMap** to enable gang scheduling settings: + 1. **Update RBAC permissions for the lws-controller-manager** + Run `kubectl edit clusterrole lws-manager-role` and add the following rule to the `rules` list. This grants the LWS controller manager the necessary permissions to manage Volcano PodGroups. + ```yaml + # Add this block to the 'rules' section + - apiGroups: + - scheduling.volcano.sh + resources: + - podgroups + verbs: + - create + - get + - list + - watch + ``` + + 2. **Update the configuration ConfigMap** to enable gang scheduling settings: ```yaml apiVersion: v1 kind: ConfigMap @@ -52,7 +67,7 @@ To enable gang scheduling, you must enable the feature flag and specify `volcano schedulerProvider: volcano ``` - 2. **Restart the lws-controller-manager** to apply the new configuration: + 3. **Restart the lws-controller-manager** to apply the new configuration: ```sh kubectl rollout restart deployment/lws-controller-manager -n lws-system ``` diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go index 3417a8ada..f78602e7e 100644 --- a/pkg/utils/utils_test.go +++ b/pkg/utils/utils_test.go @@ -20,6 +20,11 @@ import ( "testing" "github.com/google/go-cmp/cmp" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" + + leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1" ) func Test_SortByIndex(t *testing.T) { @@ -63,3 +68,133 @@ func Test_SortByIndex(t *testing.T) { }) } } + +func TestCalculatePGMinResources(t *testing.T) { + testCases := []struct { + name string + lws *leaderworkerset.LeaderWorkerSet + want corev1.ResourceList + }{ + { + name: "leader and worker with different resources", + lws: &leaderworkerset.LeaderWorkerSet{ + Spec: leaderworkerset.LeaderWorkerSetSpec{ + LeaderWorkerTemplate: leaderworkerset.LeaderWorkerTemplate{ + Size: ptr.To[int32](3), + LeaderTemplate: &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + }, + WorkerTemplate: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + want: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("5"), // 1 (leader) + 2 * 2 (workers) + corev1.ResourceMemory: resource.MustParse("5Gi"), // 1 (leader) + 2 * 2 (workers) + }, + }, + { + name: "only worker template specified", + lws: &leaderworkerset.LeaderWorkerSet{ + Spec: leaderworkerset.LeaderWorkerSetSpec{ + LeaderWorkerTemplate: leaderworkerset.LeaderWorkerTemplate{ + Size: ptr.To[int32](3), + WorkerTemplate: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + want: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("6"), // 2 (leader) + 2 * 2 (workers) + corev1.ResourceMemory: resource.MustParse("6Gi"), // 2 (leader) + 2 * 2 (workers) + }, + }, + { + name: "size is 1", + lws: &leaderworkerset.LeaderWorkerSet{ + Spec: leaderworkerset.LeaderWorkerSetSpec{ + LeaderWorkerTemplate: leaderworkerset.LeaderWorkerTemplate{ + Size: ptr.To[int32](1), + LeaderTemplate: &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + }, + }, + }, + want: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + }, + }, + { + name: "no resource requests", + lws: &leaderworkerset.LeaderWorkerSet{ + Spec: leaderworkerset.LeaderWorkerSetSpec{ + LeaderWorkerTemplate: leaderworkerset.LeaderWorkerTemplate{ + Size: ptr.To[int32](3), + WorkerTemplate: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{}}, + }, + }, + }, + }, + }, + want: corev1.ResourceList{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := CalculatePGMinResources(tc.lws) + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("CalculatePGMinResources() mismatch (-want +got):\n%s", diff) + } + }) + } +}