Skip to content

Commit f9c67b9

Browse files
authored
validate version value for dra attributeSelectors (#638)
Signed-off-by: Varun Ramachandra Sekar <[email protected]>
1 parent c2e82f0 commit f9c67b9

File tree

8 files changed

+330
-12
lines changed

8 files changed

+330
-12
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ toolchain go1.24.1
77
require (
88
github.com/Masterminds/sprig/v3 v3.3.0
99
github.com/NVIDIA/k8s-test-infra v0.0.0-20240806103558-2d7411125519
10+
github.com/blang/semver/v4 v4.0.0
1011
github.com/go-git/go-git/v5 v5.12.0
1112
github.com/go-logr/logr v1.4.2
1213
github.com/google/cel-go v0.23.2
@@ -60,7 +61,6 @@ require (
6061
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
6162
github.com/aws/aws-sdk-go v1.55.6 // indirect
6263
github.com/beorn7/perks v1.0.1 // indirect
63-
github.com/blang/semver/v4 v4.0.0 // indirect
6464
github.com/blendle/zapdriver v1.3.1 // indirect
6565
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
6666
github.com/cespare/xxhash/v2 v2.3.0 // indirect

internal/controller/platform/kserve/nimservice.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"strings"
2424
"time"
2525

26+
"github.com/blang/semver/v4"
2627
"github.com/go-logr/logr"
2728
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
2829
kserveconstants "github.com/kserve/kserve/pkg/constants"
@@ -830,6 +831,26 @@ func (r *NIMServiceReconciler) validateDRAResources(ctx context.Context, nimServ
830831
}
831832
resourceClaimNames[*resource.ResourceClaimName] = true
832833
}
834+
835+
// Check AttributeSelector version values.
836+
for idx, resource := range nimService.Spec.DRAResources {
837+
if resource.ClaimCreationSpec == nil {
838+
continue
839+
}
840+
for deviceIdx, device := range resource.ClaimCreationSpec.Devices {
841+
for attributeIdx, attribute := range device.AttributeSelectors {
842+
if attribute.Value.VersionValue == nil {
843+
continue
844+
}
845+
_, err := semver.Parse(*attribute.Value.VersionValue)
846+
if err != nil {
847+
msg := fmt.Sprintf("spec.draResources[%d].claimCreationSpec.devices[%d].attributeSelectors[%d].value.versionValue.version: invalid version %q: %v", idx, deviceIdx, attributeIdx, *attribute.Value.VersionValue, err)
848+
logger.Error(err, msg, "nimService", nimService.Name)
849+
return false, msg, nil
850+
}
851+
}
852+
}
853+
}
833854
return true, "", nil
834855
}
835856

internal/controller/platform/standalone/nimservice.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"strings"
2424
"time"
2525

26+
"github.com/blang/semver/v4"
2627
"github.com/go-logr/logr"
2728
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2829
appsv1 "k8s.io/api/apps/v1"
@@ -148,6 +149,25 @@ func (r *NIMServiceReconciler) validateDRAResources(ctx context.Context, nimServ
148149
resourceClaimNames[*resource.ResourceClaimName] = true
149150
}
150151

152+
// Check AttributeSelector version values.
153+
for idx, resource := range nimService.Spec.DRAResources {
154+
if resource.ClaimCreationSpec == nil {
155+
continue
156+
}
157+
for deviceIdx, device := range resource.ClaimCreationSpec.Devices {
158+
for attributeIdx, attribute := range device.AttributeSelectors {
159+
if attribute.Value.VersionValue == nil {
160+
continue
161+
}
162+
_, err := semver.Parse(*attribute.Value.VersionValue)
163+
if err != nil {
164+
msg := fmt.Sprintf("spec.draResources[%d].claimCreationSpec.devices[%d].attributeSelectors[%d].value.versionValue.version: invalid version %q: %v", idx, deviceIdx, attributeIdx, *attribute.Value.VersionValue, err)
165+
logger.Error(err, msg, "nimService", nimService.Name)
166+
return false, msg, nil
167+
}
168+
}
169+
}
170+
}
151171
return true, "", nil
152172
}
153173

internal/controller/platform/standalone/nimservice_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
145145
Expect(monitoringv1.AddToScheme(scheme)).To(Succeed())
146146
Expect(lwsv1.AddToScheme(scheme)).To(Succeed())
147147
Expect(gatewayv1.Install(scheme)).To(Succeed())
148+
Expect(resourcev1beta2.AddToScheme(scheme)).To(Succeed())
148149

149150
client = fake.NewClientBuilder().WithScheme(scheme).
150151
WithStatusSubresource(&appsv1alpha1.NIMService{}).
@@ -756,6 +757,87 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
756757
Expect(failedCondition.Reason).To(Equal(conditions.ReasonDRAResourcesUnsupported))
757758
Expect(failedCondition.Message).To(Equal("spec.draResources[1].resourceClaimName: duplicate resource claim name: 'test-resource-claim'"))
758759
})
760+
761+
It("should mark NIMService as failed when ClaimCreationSpec.Devices[].AttributeSelectors version value is invalid", func() {
762+
nimService.Spec.DRAResources = []appsv1alpha1.DRAResource{
763+
{
764+
ClaimCreationSpec: &appsv1alpha1.DRAClaimCreationSpec{
765+
Devices: []appsv1alpha1.DRADeviceSpec{
766+
{
767+
Name: "test-device",
768+
Count: 1,
769+
DriverName: "gpu.nvidia.com",
770+
DeviceClassName: "gpu.nvidia.com",
771+
AttributeSelectors: []appsv1alpha1.DRADeviceAttributeSelector{
772+
{
773+
Key: "testKey",
774+
Op: "GreaterThan",
775+
Value: &appsv1alpha1.DRADeviceAttributeSelectorValue{
776+
VersionValue: ptr.To("550.127.08"),
777+
},
778+
},
779+
},
780+
},
781+
},
782+
},
783+
},
784+
}
785+
nimServiceKey := types.NamespacedName{Name: nimService.Name, Namespace: nimService.Namespace}
786+
err := client.Create(context.TODO(), nimService)
787+
Expect(err).NotTo(HaveOccurred())
788+
789+
_, err = reconciler.reconcileNIMService(context.TODO(), nimService)
790+
Expect(err).NotTo(HaveOccurred())
791+
792+
obj := &appsv1alpha1.NIMService{}
793+
err = client.Get(context.TODO(), nimServiceKey, obj)
794+
Expect(err).NotTo(HaveOccurred())
795+
Expect(obj.Status.State).To(Equal(appsv1alpha1.NIMServiceStatusFailed))
796+
failedCondition := getCondition(obj, conditions.Failed)
797+
Expect(failedCondition).NotTo(BeNil())
798+
Expect(failedCondition.Status).To(Equal(metav1.ConditionTrue))
799+
Expect(failedCondition.Reason).To(Equal(conditions.ReasonDRAResourcesUnsupported))
800+
Expect(failedCondition.Message).To(ContainSubstring("spec.draResources[0].claimCreationSpec.devices[0].attributeSelectors[0].value.versionValue.version: invalid version \"550.127.08\":"))
801+
})
802+
803+
It("should succeed with valid ClaimCreationSpec", func() {
804+
nimService.Spec.DRAResources = []appsv1alpha1.DRAResource{
805+
{
806+
ClaimCreationSpec: &appsv1alpha1.DRAClaimCreationSpec{
807+
Devices: []appsv1alpha1.DRADeviceSpec{
808+
{
809+
Name: "test-device",
810+
Count: 1,
811+
DriverName: "gpu.nvidia.com",
812+
DeviceClassName: "gpu.nvidia.com",
813+
AttributeSelectors: []appsv1alpha1.DRADeviceAttributeSelector{
814+
{
815+
Key: "testKey",
816+
Op: "GreaterThan",
817+
Value: &appsv1alpha1.DRADeviceAttributeSelectorValue{
818+
VersionValue: ptr.To("550.127.8"),
819+
},
820+
},
821+
},
822+
},
823+
},
824+
},
825+
},
826+
}
827+
nimServiceKey := types.NamespacedName{Name: nimService.Name, Namespace: nimService.Namespace}
828+
err := client.Create(context.TODO(), nimService)
829+
Expect(err).NotTo(HaveOccurred())
830+
831+
_, err = reconciler.reconcileNIMService(context.TODO(), nimService)
832+
Expect(err).NotTo(HaveOccurred())
833+
834+
obj := &appsv1alpha1.NIMService{}
835+
err = client.Get(context.TODO(), nimServiceKey, obj)
836+
Expect(err).NotTo(HaveOccurred())
837+
Expect(obj.Status.State).NotTo(Equal(appsv1alpha1.NIMServiceStatusFailed))
838+
failedCondition := getCondition(obj, conditions.Failed)
839+
Expect(failedCondition.Status).To(Equal(metav1.ConditionFalse))
840+
})
759841
})
760842

761843
It("should delete Deployment when the NIMService is deleted", func() {

internal/shared/resourceclaims.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,21 @@ func ShouldCreateDRAResource(resource appsv1alpha1.DRAResource) bool {
199199

200200
func GetDRADeviceCELExpressions(device appsv1alpha1.DRADeviceSpec) ([]string, error) {
201201
celExpressions := make([]string, 0)
202-
for _, expr := range device.CELExpressions {
203-
err := k8sutilcel.ValidateExpr(expr)
204-
if err != nil {
205-
return nil, err
202+
celExpressions = append(celExpressions, fmt.Sprintf("device.driver == %q", device.DriverName))
203+
if len(device.CELExpressions) > 0 {
204+
if len(device.AttributeSelectors) > 0 || len(device.CapacitySelectors) > 0 {
205+
return nil, fmt.Errorf("CELExpressions must not be set if attributeSelectors or capacitySelectors are set")
206+
}
207+
for _, expr := range device.CELExpressions {
208+
err := k8sutilcel.ValidateExpr(expr)
209+
if err != nil {
210+
return nil, err
211+
}
212+
celExpressions = append(celExpressions, expr)
206213
}
207-
celExpressions = append(celExpressions, expr)
214+
return celExpressions, nil
208215
}
209-
celExpressions = append(celExpressions, fmt.Sprintf("device.driver == %q", device.DriverName))
216+
210217
for _, selector := range device.AttributeSelectors {
211218
expr, err := selector.GetCELExpression(device.DriverName)
212219
if err != nil {

0 commit comments

Comments
 (0)