Skip to content

Commit 4b2a410

Browse files
committed
Conformance: Adds Data Parallelism Test
Signed-off-by: Daneyon Hansen <[email protected]>
1 parent d788a2c commit 4b2a410

File tree

5 files changed

+483
-8
lines changed

5 files changed

+483
-8
lines changed

conformance/resources/base.yaml

Lines changed: 205 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ spec:
200200
terminationGracePeriodSeconds: 130
201201
containers:
202202
- name: epp
203-
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
203+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251023-d788a2c
204204
imagePullPolicy: Always
205205
args:
206206
- --pool-name
@@ -298,7 +298,7 @@ spec:
298298
terminationGracePeriodSeconds: 130
299299
containers:
300300
- name: epp
301-
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
301+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251023-d788a2c
302302
imagePullPolicy: Always
303303
args:
304304
- --pool-name
@@ -340,6 +340,209 @@ spec:
340340
configMap:
341341
name: plugins-config
342342
---
343+
# -- Data Parallelism (DP) backend deployment: 3 pods, each listening on three ports to simulate ranks ---
344+
apiVersion: apps/v1
345+
kind: Deployment
346+
metadata:
347+
name: dp-inference-model-server-deployment
348+
namespace: inference-conformance-app-backend
349+
labels:
350+
app: dp-inference-model-server
351+
spec:
352+
replicas: 3
353+
selector:
354+
matchLabels:
355+
app: dp-inference-model-server
356+
template:
357+
metadata:
358+
labels:
359+
app: dp-inference-model-server
360+
spec:
361+
containers:
362+
- name: echoserver-3000
363+
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
364+
ports:
365+
- containerPort: 3000
366+
readinessProbe:
367+
httpGet:
368+
path: /
369+
port: 3000
370+
initialDelaySeconds: 3
371+
periodSeconds: 5
372+
failureThreshold: 2
373+
env:
374+
- name: HTTP_PORT # Default port for HTTP echo server
375+
value: "3000"
376+
- name: H2C_PORT # Default port for HTC echo server
377+
value: "3001"
378+
- name: POD_NAME
379+
valueFrom:
380+
fieldRef:
381+
fieldPath: metadata.name
382+
- name: NAMESPACE
383+
valueFrom:
384+
fieldRef:
385+
fieldPath: metadata.namespace
386+
- name: POD_IP
387+
valueFrom:
388+
fieldRef:
389+
fieldPath: status.podIP
390+
- name: echoserver-3002
391+
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
392+
ports:
393+
- containerPort: 3002
394+
readinessProbe:
395+
httpGet:
396+
path: /
397+
port: 3002
398+
initialDelaySeconds: 3
399+
periodSeconds: 5
400+
failureThreshold: 2
401+
env:
402+
- name: HTTP_PORT
403+
value: "3002"
404+
- name: H2C_PORT
405+
value: "3003"
406+
- name: POD_NAME
407+
valueFrom:
408+
fieldRef:
409+
fieldPath: metadata.name
410+
- name: NAMESPACE
411+
valueFrom:
412+
fieldRef:
413+
fieldPath: metadata.namespace
414+
- name: POD_IP
415+
valueFrom:
416+
fieldRef:
417+
fieldPath: status.podIP
418+
- name: echoserver-3004
419+
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
420+
ports:
421+
- containerPort: 3004
422+
readinessProbe:
423+
httpGet:
424+
path: /
425+
port: 3004
426+
initialDelaySeconds: 3
427+
periodSeconds: 5
428+
failureThreshold: 2
429+
env:
430+
- name: HTTP_PORT
431+
value: "3004"
432+
- name: H2C_PORT
433+
value: "3005"
434+
- name: POD_NAME
435+
valueFrom:
436+
fieldRef:
437+
fieldPath: metadata.name
438+
- name: NAMESPACE
439+
valueFrom:
440+
fieldRef:
441+
fieldPath: metadata.namespace
442+
- name: POD_IP
443+
valueFrom:
444+
fieldRef:
445+
fieldPath: status.podIP
446+
---
447+
# --- Data Parallelism (DP) InferencePool Definition ---
448+
apiVersion: inference.networking.k8s.io/v1
449+
kind: InferencePool
450+
metadata:
451+
name: dp-inference-pool
452+
namespace: inference-conformance-app-backend
453+
spec:
454+
selector:
455+
matchLabels:
456+
app: dp-inference-model-server
457+
targetPorts:
458+
- number: 3000
459+
- number: 3002
460+
- number: 3004
461+
endpointPickerRef:
462+
name: dp-endpoint-picker-svc
463+
port:
464+
number: 9002
465+
---
466+
# --- Data Parallelism (DP) Conformance EPP service Definition ---
467+
apiVersion: v1
468+
kind: Service
469+
metadata:
470+
name: dp-endpoint-picker-svc
471+
namespace: inference-conformance-app-backend
472+
spec:
473+
selector:
474+
app: dp-app-backend-epp
475+
ports:
476+
- protocol: TCP
477+
port: 9002
478+
targetPort: 9002
479+
appProtocol: http2
480+
type: ClusterIP
481+
---
482+
# --- Data Parallelism (DP) Conformance EPP Deployment ---
483+
apiVersion: apps/v1
484+
kind: Deployment
485+
metadata:
486+
name: dp-app-endpoint-picker
487+
namespace: inference-conformance-app-backend
488+
labels:
489+
app: dp-app-backend-epp
490+
spec:
491+
replicas: 1
492+
selector:
493+
matchLabels:
494+
app: dp-app-backend-epp
495+
template:
496+
metadata:
497+
labels:
498+
app: dp-app-backend-epp
499+
spec:
500+
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
501+
terminationGracePeriodSeconds: 130
502+
containers:
503+
- name: epp
504+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251023-d788a2c
505+
imagePullPolicy: Always
506+
args:
507+
- --pool-name
508+
- "dp-inference-pool"
509+
- --pool-namespace
510+
- "inference-conformance-app-backend"
511+
- --v
512+
- "4"
513+
- --zap-encoder
514+
- "json"
515+
- --grpc-port
516+
- "9002"
517+
- --grpc-health-port
518+
- "9003"
519+
- "--config-file"
520+
- "/config/conformance-plugins.yaml"
521+
ports:
522+
- containerPort: 9002
523+
- containerPort: 9003
524+
- name: metrics
525+
containerPort: 9090
526+
livenessProbe:
527+
grpc:
528+
port: 9003
529+
service: inference-extension
530+
initialDelaySeconds: 5
531+
periodSeconds: 10
532+
readinessProbe:
533+
grpc:
534+
port: 9003
535+
service: inference-extension
536+
initialDelaySeconds: 5
537+
periodSeconds: 10
538+
volumeMounts:
539+
- name: plugins-config-volume
540+
mountPath: "/config"
541+
volumes:
542+
- name: plugins-config-volume
543+
configMap:
544+
name: plugins-config
545+
---
343546
apiVersion: v1
344547
kind: ConfigMap
345548
metadata:

0 commit comments

Comments
 (0)