Skip to content

Commit 1b8ba9b

Browse files
committed
add 235b model testing yamls
Signed-off-by: Ning Wang <[email protected]>
1 parent 7a09c2b commit 1b8ba9b

File tree

7 files changed

+546
-29
lines changed

7 files changed

+546
-29
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: qwen3-235b-service
5+
namespace: default
6+
spec:
7+
selector:
8+
model.aibrix.ai/name: qwen3-235b
9+
ports:
10+
- protocol: TCP
11+
port: 8000
12+
targetPort: 8000
13+
nodePort: 30010
14+
type: NodePort

test/regression/v0.4.0/sglang/8b-service.yaml

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,3 @@ spec:
1212
targetPort: 8000
1313
nodePort: 30008
1414
type: NodePort
15-
16-
17-
apiVersion: v1
18-
kind: Service
19-
metadata:
20-
name: llama2-7b
21-
namespace: default
22-
labels:
23-
prometheus-discovery: "true"
24-
annotations:
25-
prometheus.io/scrape: "true"
26-
prometheus.io/path: "/metrics"
27-
prometheus.io/port: "8000"
28-
spec:
29-
selector:
30-
model.aibrix.ai/name: "llama2-7b"
31-
ports:
32-
- protocol: TCP
33-
name: metrics
34-
port: 8000
35-
targetPort: 8000
36-
nodePort: 30081
37-
type: NodePort
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
apiVersion: orchestration.aibrix.ai/v1alpha1
2+
kind: StormService
3+
metadata:
4+
name: aibrix-router-1p1d-tp8
5+
namespace: default
6+
spec:
7+
replicas: 1
8+
updateStrategy:
9+
type: InPlaceUpdate
10+
stateful: true
11+
selector:
12+
matchLabels:
13+
app: aibrix-router-1p1d-tp8
14+
template:
15+
metadata:
16+
labels:
17+
app: aibrix-router-1p1d-tp8
18+
spec:
19+
roles:
20+
- name: prefill
21+
replicas: 1
22+
stateful: true
23+
template:
24+
metadata:
25+
annotations:
26+
k8s.volcengine.com/pod-networks: |
27+
[
28+
{
29+
"cniConf":{
30+
"name":"rdma"
31+
}
32+
},
33+
{
34+
"cniConf":{
35+
"name":"rdma"
36+
}
37+
},
38+
{
39+
"cniConf":{
40+
"name":"rdma"
41+
}
42+
},
43+
{
44+
"cniConf":{
45+
"name":"rdma"
46+
}
47+
},
48+
{
49+
"cniConf":{
50+
"name":"rdma"
51+
}
52+
},
53+
{
54+
"cniConf":{
55+
"name":"rdma"
56+
}
57+
},
58+
{
59+
"cniConf":{
60+
"name":"rdma"
61+
}
62+
},
63+
{
64+
"cniConf":{
65+
"name":"rdma"
66+
}
67+
}
68+
]
69+
labels:
70+
model.aibrix.ai/name: qwen3-235b
71+
model.aibrix.ai/port: "30000"
72+
model.aibrix.ai/engine: sglang
73+
spec:
74+
containers:
75+
- name: prefill
76+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
77+
command: ["sh", "-c"]
78+
args:
79+
- |
80+
python3 -m sglang.launch_server \
81+
--model-path /models/Qwen3-235B-A22B \
82+
--served-model-name qwen3-235b \
83+
--host 0.0.0.0 \
84+
--port 30000 \
85+
--disaggregation-mode prefill \
86+
--disaggregation-transfer-backend=mooncake \
87+
--tp-size 8 \
88+
--trust-remote-code \
89+
--mem-fraction-static 0.8 \
90+
--log-level debug
91+
env:
92+
- name: GLOO_SOCKET_IFNAME
93+
value: eth0
94+
- name: NCCL_SOCKET_IFNAME
95+
value: eth0
96+
- name: NCCL_IB_DISABLE
97+
value: "0"
98+
- name: NCCL_IB_GID_INDEX
99+
value: "7"
100+
- name: NCCL_DEBUG
101+
value: "INFO"
102+
volumeMounts:
103+
- name: model-vol
104+
mountPath: /models
105+
readOnly: true
106+
- mountPath: /dev/shm
107+
name: shared-mem
108+
resources:
109+
limits:
110+
nvidia.com/gpu: 8
111+
vke.volcengine.com/rdma: "8"
112+
securityContext:
113+
capabilities:
114+
add:
115+
- IPC_LOCK
116+
volumes:
117+
- name: model-vol
118+
hostPath:
119+
path: /data01/models
120+
type: Directory
121+
- emptyDir:
122+
medium: Memory
123+
name: shared-mem
124+
- name: decode
125+
replicas: 1
126+
stateful: true
127+
template:
128+
metadata:
129+
annotations:
130+
k8s.volcengine.com/pod-networks: |
131+
[
132+
{
133+
"cniConf":{
134+
"name":"rdma"
135+
}
136+
},
137+
{
138+
"cniConf":{
139+
"name":"rdma"
140+
}
141+
},
142+
{
143+
"cniConf":{
144+
"name":"rdma"
145+
}
146+
},
147+
{
148+
"cniConf":{
149+
"name":"rdma"
150+
}
151+
},
152+
{
153+
"cniConf":{
154+
"name":"rdma"
155+
}
156+
},
157+
{
158+
"cniConf":{
159+
"name":"rdma"
160+
}
161+
},
162+
{
163+
"cniConf":{
164+
"name":"rdma"
165+
}
166+
},
167+
{
168+
"cniConf":{
169+
"name":"rdma"
170+
}
171+
}
172+
]
173+
labels:
174+
model.aibrix.ai/name: qwen3-235b
175+
model.aibrix.ai/port: "30000"
176+
model.aibrix.ai/engine: sglang
177+
spec:
178+
containers:
179+
- name: decode
180+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
181+
command: ["sh", "-c"]
182+
args:
183+
- |
184+
python3 -m sglang.launch_server \
185+
--model-path /models/Qwen3-235B-A22B \
186+
--served-model-name qwen3-235b \
187+
--host 0.0.0.0 \
188+
--port 30000 \
189+
--disaggregation-mode decode \
190+
--disaggregation-transfer-backend=mooncake \
191+
--tp-size 8 \
192+
--trust-remote-code \
193+
--mem-fraction-static 0.8 \
194+
--log-level debug
195+
env:
196+
- name: GLOO_SOCKET_IFNAME
197+
value: eth0
198+
- name: NCCL_SOCKET_IFNAME
199+
value: eth0
200+
- name: NCCL_IB_DISABLE
201+
value: "0"
202+
- name: NCCL_IB_GID_INDEX
203+
value: "7"
204+
- name: NCCL_DEBUG
205+
value: "INFO"
206+
volumeMounts:
207+
- name: model-vol
208+
mountPath: /models
209+
readOnly: true
210+
- mountPath: /dev/shm
211+
name: shared-mem
212+
resources:
213+
limits:
214+
nvidia.com/gpu: 8
215+
vke.volcengine.com/rdma: "8"
216+
securityContext:
217+
capabilities:
218+
add:
219+
- IPC_LOCK
220+
volumes:
221+
- name: model-vol
222+
hostPath:
223+
path: /data01/models
224+
type: Directory
225+
- emptyDir:
226+
medium: Memory
227+
name: shared-mem

test/regression/v0.4.0/sglang/qwen-235b/sglang-base.yaml

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,50 @@ spec:
1515
metadata:
1616
labels:
1717
model.aibrix.ai/name: qwen3-235b
18+
annotations:
19+
k8s.volcengine.com/pod-networks: |
20+
[
21+
{
22+
"cniConf":{
23+
"name":"rdma"
24+
}
25+
},
26+
{
27+
"cniConf":{
28+
"name":"rdma"
29+
}
30+
},
31+
{
32+
"cniConf":{
33+
"name":"rdma"
34+
}
35+
},
36+
{
37+
"cniConf":{
38+
"name":"rdma"
39+
}
40+
},
41+
{
42+
"cniConf":{
43+
"name":"rdma"
44+
}
45+
},
46+
{
47+
"cniConf":{
48+
"name":"rdma"
49+
}
50+
},
51+
{
52+
"cniConf":{
53+
"name":"rdma"
54+
}
55+
},
56+
{
57+
"cniConf":{
58+
"name":"rdma"
59+
}
60+
}
61+
]
1862
spec:
1963
containers:
2064
- name: sglang-server
@@ -23,17 +67,30 @@ spec:
2367
args:
2468
- |
2569
python3 -m sglang.launch_server \
26-
--model-path models/Qwen3-235B-A22B \
70+
--model-path /models/Qwen3-235B-A22B \
2771
--served-model-name qwen3-235b \
2872
--host 0.0.0.0 \
2973
--port 8000 \
74+
--tp-size 8 \
3075
--trust-remote-code \
3176
--enable-metrics \
3277
--mem-fraction-static 0.8 \
3378
--log-level debug
79+
env:
80+
- name: GLOO_SOCKET_IFNAME
81+
value: eth0
82+
- name: NCCL_SOCKET_IFNAME
83+
value: eth0
84+
- name: NCCL_IB_DISABLE
85+
value: "0"
86+
- name: NCCL_IB_GID_INDEX
87+
value: "7"
88+
- name: NCCL_DEBUG
89+
value: "INFO"
3490
resources:
3591
limits:
36-
nvidia.com/gpu: 1
92+
nvidia.com/gpu: 8
93+
vke.volcengine.com/rdma: "8"
3794
volumeMounts:
3895
- name: model-vol
3996
mountPath: /models

test/regression/v0.4.0/sglang/qwen-235b/sglang-router-1p1d-tp8.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ spec:
1010
stateful: true
1111
selector:
1212
matchLabels:
13-
app: sglang-router-1p1d-tp8
13+
app: sglang-router-1p1d-tp8
1414
template:
1515
metadata:
1616
labels:
17-
app: sglang-router-1p1d-tp8
17+
app: sglang-router-1p1d-tp8
1818
spec:
1919
roles:
2020
- name: routing
@@ -94,7 +94,7 @@ spec:
9494
args:
9595
- |
9696
python3 -m sglang.launch_server \
97-
--model-path models/Qwen3-235B-A22B \
97+
--model-path /models/Qwen3-235B-A22B \
9898
--served-model-name qwen3-235b \
9999
--host 0.0.0.0 \
100100
--port 30000 \
@@ -194,7 +194,7 @@ spec:
194194
args:
195195
- |
196196
python3 -m sglang.launch_server \
197-
--model-path models/Qwen3-235B-A22B \
197+
--model-path /models/Qwen3-235B-A22B \
198198
--served-model-name qwen3-235b \
199199
--host 0.0.0.0 \
200200
--port 30000 \

0 commit comments

Comments
 (0)