Skip to content

Commit aa3ee3d

Browse files
committed
add aibrix vll router yaml
Signed-off-by: Ning Wang <[email protected]>
1 parent 0c81257 commit aa3ee3d

File tree

6 files changed

+863
-2
lines changed

6 files changed

+863
-2
lines changed

test/regression/v0.4.0/vllm/1p1d.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ apiVersion: orchestration.aibrix.ai/v1alpha1
22
kind: StormService
33
metadata:
44
name: vllm-1p1d
5+
namespace: default
56
spec:
67
replicas: 1
78
updateStrategy:
@@ -42,7 +43,7 @@ spec:
4243
--port "8000" \
4344
--uvicorn-log-level warning \
4445
--model /models/Qwen3-8B \
45-
--served-model-name qwen3-8B \
46+
--served-model-name qwen3-8b \
4647
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
4748
env:
4849
- name: UCX_TLS
@@ -87,6 +88,8 @@ spec:
8788
- emptyDir:
8889
medium: Memory
8990
name: shared-mem
91+
nodeSelector:
92+
kubernetes.io/hostname: 192.168.0.6
9093
- name: decode
9194
replicas: 1
9295
stateful: true
@@ -113,7 +116,7 @@ spec:
113116
--port "8000" \
114117
--uvicorn-log-level warning \
115118
--model /models/Qwen3-8B \
116-
--served-model-name qwen3-8B \
119+
--served-model-name qwen3-8b \
117120
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
118121
env:
119122
- name: UCX_TLS
@@ -158,3 +161,5 @@ spec:
158161
- emptyDir:
159162
medium: Memory
160163
name: shared-mem
164+
nodeSelector:
165+
kubernetes.io/hostname: 192.168.0.6
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
apiVersion: orchestration.aibrix.ai/v1alpha1
2+
kind: StormService
3+
metadata:
4+
name: vllm-2p2d-tp2
5+
namespace: default
6+
spec:
7+
replicas: 1
8+
updateStrategy:
9+
type: InPlaceUpdate
10+
stateful: true
11+
selector:
12+
matchLabels:
13+
app: vllm-2p2d-tp2
14+
template:
15+
metadata:
16+
labels:
17+
app: vllm-2p2d-tp2
18+
spec:
19+
roles:
20+
- name: prefill
21+
replicas: 2
22+
stateful: true
23+
template:
24+
metadata:
25+
annotations:
26+
k8s.volcengine.com/pod-networks: |
27+
[
28+
{
29+
"cniConf":{
30+
"name":"rdma"
31+
}
32+
},
33+
{
34+
"cniConf":{
35+
"name":"rdma"
36+
}
37+
}
38+
]
39+
spec:
40+
containers:
41+
- name: prefill
42+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
43+
command: ["sh", "-c"]
44+
args:
45+
- |
46+
python3 -m vllm.entrypoints.openai.api_server \
47+
--host "0.0.0.0" \
48+
--port "8000" \
49+
--uvicorn-log-level warning \
50+
--model /models/Qwen3-32B \
51+
--served-model-name qwen3-32b \
52+
--tensor-parallel-size 2 \
53+
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
54+
env:
55+
- name: UCX_TLS
56+
value: cuda_ipc,cuda_copy,tcp
57+
- name: VLLM_SERVER_DEV_MODE
58+
value: "1"
59+
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
60+
value: "5558"
61+
- name: VLLM_WORKER_MULTIPROC_METHOD
62+
value: spawn
63+
- name: VLLM_ENABLE_V1_MULTIPROCESSING
64+
value: "0"
65+
- name: GLOO_SOCKET_IFNAME
66+
value: eth0
67+
- name: NCCL_SOCKET_IFNAME
68+
value: eth0
69+
- name: NCCL_IB_DISABLE
70+
value: "0"
71+
- name: NCCL_IB_GID_INDEX
72+
value: "7"
73+
- name: NCCL_DEBUG
74+
value: "INFO"
75+
volumeMounts:
76+
- name: model-vol
77+
mountPath: /models
78+
readOnly: true
79+
- mountPath: /dev/shm
80+
name: shared-mem
81+
resources:
82+
limits:
83+
nvidia.com/gpu: 2
84+
vke.volcengine.com/rdma: "2"
85+
securityContext:
86+
capabilities:
87+
add:
88+
- IPC_LOCK
89+
volumes:
90+
- name: model-vol
91+
hostPath:
92+
path: /data01/models
93+
type: Directory
94+
- emptyDir:
95+
medium: Memory
96+
name: shared-mem
97+
- name: decode
98+
replicas: 2
99+
stateful: true
100+
template:
101+
metadata:
102+
annotations:
103+
k8s.volcengine.com/pod-networks: |
104+
[
105+
{
106+
"cniConf":{
107+
"name":"rdma"
108+
}
109+
},
110+
{
111+
"cniConf":{
112+
"name":"rdma"
113+
}
114+
}
115+
]
116+
spec:
117+
containers:
118+
- name: decode
119+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
120+
command: ["sh", "-c"]
121+
args:
122+
- |
123+
python3 -m vllm.entrypoints.openai.api_server \
124+
--host "0.0.0.0" \
125+
--port "8000" \
126+
--uvicorn-log-level warning \
127+
--model /models/Qwen3-32B \
128+
--served-model-name qwen3-32b \
129+
--tensor-parallel-size 2 \
130+
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
131+
env:
132+
- name: UCX_TLS
133+
value: cuda_ipc,cuda_copy,tcp
134+
- name: VLLM_SERVER_DEV_MODE
135+
value: "1"
136+
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
137+
value: "5558"
138+
- name: VLLM_WORKER_MULTIPROC_METHOD
139+
value: spawn
140+
- name: VLLM_ENABLE_V1_MULTIPROCESSING
141+
value: "0"
142+
- name: GLOO_SOCKET_IFNAME
143+
value: eth0
144+
- name: NCCL_SOCKET_IFNAME
145+
value: eth0
146+
- name: NCCL_IB_DISABLE
147+
value: "0"
148+
- name: NCCL_IB_GID_INDEX
149+
value: "7"
150+
- name: NCCL_DEBUG
151+
value: "INFO"
152+
volumeMounts:
153+
- name: model-vol
154+
mountPath: /models
155+
readOnly: true
156+
- mountPath: /dev/shm
157+
name: shared-mem
158+
resources:
159+
limits:
160+
nvidia.com/gpu: 2
161+
vke.volcengine.com/rdma: "2"
162+
securityContext:
163+
capabilities:
164+
add:
165+
- IPC_LOCK
166+
volumes:
167+
- name: model-vol
168+
hostPath:
169+
path: /data01/models
170+
type: Directory
171+
- emptyDir:
172+
medium: Memory
173+
name: shared-mem
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
apiVersion: orchestration.aibrix.ai/v1alpha1
2+
kind: StormService
3+
metadata:
4+
name: vllm-4p3d
5+
namespace: default
6+
spec:
7+
replicas: 1
8+
updateStrategy:
9+
type: InPlaceUpdate
10+
stateful: true
11+
selector:
12+
matchLabels:
13+
app: vllm-4p3d
14+
template:
15+
metadata:
16+
labels:
17+
app: vllm-4p3d
18+
spec:
19+
roles:
20+
- name: prefill
21+
replicas: 4
22+
stateful: true
23+
template:
24+
metadata:
25+
annotations:
26+
k8s.volcengine.com/pod-networks: |
27+
[
28+
{
29+
"cniConf":{
30+
"name":"rdma"
31+
}
32+
}
33+
]
34+
spec:
35+
containers:
36+
- name: prefill
37+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
38+
command: ["sh", "-c"]
39+
args:
40+
- |
41+
python3 -m vllm.entrypoints.openai.api_server \
42+
--host "0.0.0.0" \
43+
--port "8000" \
44+
--uvicorn-log-level warning \
45+
--model /models/Qwen3-8B \
46+
--served-model-name qwen3-8b \
47+
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
48+
env:
49+
- name: UCX_TLS
50+
value: cuda_ipc,cuda_copy,tcp
51+
- name: VLLM_SERVER_DEV_MODE
52+
value: "1"
53+
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
54+
value: "5558"
55+
- name: VLLM_WORKER_MULTIPROC_METHOD
56+
value: spawn
57+
- name: VLLM_ENABLE_V1_MULTIPROCESSING
58+
value: "0"
59+
- name: GLOO_SOCKET_IFNAME
60+
value: eth0
61+
- name: NCCL_SOCKET_IFNAME
62+
value: eth0
63+
- name: NCCL_IB_DISABLE
64+
value: "0"
65+
- name: NCCL_IB_GID_INDEX
66+
value: "7"
67+
- name: NCCL_DEBUG
68+
value: "INFO"
69+
volumeMounts:
70+
- name: model-vol
71+
mountPath: /models
72+
readOnly: true
73+
- mountPath: /dev/shm
74+
name: shared-mem
75+
resources:
76+
limits:
77+
nvidia.com/gpu: 1
78+
vke.volcengine.com/rdma: "1"
79+
securityContext:
80+
capabilities:
81+
add:
82+
- IPC_LOCK
83+
volumes:
84+
- name: model-vol
85+
hostPath:
86+
path: /data01/models
87+
type: Directory
88+
- emptyDir:
89+
medium: Memory
90+
name: shared-mem
91+
- name: decode
92+
replicas: 3
93+
stateful: true
94+
template:
95+
metadata:
96+
annotations:
97+
k8s.volcengine.com/pod-networks: |
98+
[
99+
{
100+
"cniConf":{
101+
"name":"rdma"
102+
}
103+
}
104+
]
105+
spec:
106+
containers:
107+
- name: decode
108+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
109+
command: ["sh", "-c"]
110+
args:
111+
- |
112+
python3 -m vllm.entrypoints.openai.api_server \
113+
--host "0.0.0.0" \
114+
--port "8000" \
115+
--uvicorn-log-level warning \
116+
--model /models/Qwen3-8B \
117+
--served-model-name qwen3-8b \
118+
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
119+
env:
120+
- name: UCX_TLS
121+
value: cuda_ipc,cuda_copy,tcp
122+
- name: VLLM_SERVER_DEV_MODE
123+
value: "1"
124+
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
125+
value: "5558"
126+
- name: VLLM_WORKER_MULTIPROC_METHOD
127+
value: spawn
128+
- name: VLLM_ENABLE_V1_MULTIPROCESSING
129+
value: "0"
130+
- name: GLOO_SOCKET_IFNAME
131+
value: eth0
132+
- name: NCCL_SOCKET_IFNAME
133+
value: eth0
134+
- name: NCCL_IB_DISABLE
135+
value: "0"
136+
- name: NCCL_IB_GID_INDEX
137+
value: "7"
138+
- name: NCCL_DEBUG
139+
value: "INFO"
140+
volumeMounts:
141+
- name: model-vol
142+
mountPath: /models
143+
readOnly: true
144+
- mountPath: /dev/shm
145+
name: shared-mem
146+
resources:
147+
limits:
148+
nvidia.com/gpu: 1
149+
vke.volcengine.com/rdma: "1"
150+
securityContext:
151+
capabilities:
152+
add:
153+
- IPC_LOCK
154+
volumes:
155+
- name: model-vol
156+
hostPath:
157+
path: /data01/models
158+
type: Directory
159+
- emptyDir:
160+
medium: Memory
161+
name: shared-mem
162+

0 commit comments

Comments
 (0)