Skip to content

Commit 2065f3c

Browse files
committed
add sglang-235b yaml
Signed-off-by: Ning Wang <[email protected]>
1 parent 3432927 commit 2065f3c

File tree

4 files changed

+299
-7
lines changed

4 files changed

+299
-7
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
model.aibrix.ai/name: qwen3-235b
6+
model.aibrix.ai/port: "8000"
7+
name: qwen3-235b
8+
namespace: default
9+
spec:
10+
replicas: 2
11+
selector:
12+
matchLabels:
13+
model.aibrix.ai/name: qwen3-235b
14+
template:
15+
metadata:
16+
labels:
17+
model.aibrix.ai/name: qwen3-235b
18+
spec:
19+
containers:
20+
- name: sglang-server
21+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
22+
command: ["sh", "-c"]
23+
args:
24+
- |
25+
python3 -m sglang.launch_server \
26+
--model-path models/Qwen3-235B-A22B \
27+
--served-model-name qwen3-235b \
28+
--host 0.0.0.0 \
29+
--port 8000 \
30+
--trust-remote-code \
31+
--enable-metrics \
32+
--mem-fraction-static 0.8 \
33+
--log-level debug
34+
resources:
35+
limits:
36+
nvidia.com/gpu: 1
37+
volumeMounts:
38+
- name: model-vol
39+
mountPath: /models
40+
readOnly: true
41+
- mountPath: /dev/shm
42+
name: shared-mem
43+
securityContext:
44+
capabilities:
45+
add:
46+
- IPC_LOCK
47+
volumes:
48+
- name: model-vol
49+
hostPath:
50+
path: /data01/models
51+
type: Directory
52+
- emptyDir:
53+
medium: Memory
54+
name: shared-mem
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
apiVersion: orchestration.aibrix.ai/v1alpha1
2+
kind: StormService
3+
metadata:
4+
name: sglang-router-1p1d-tp8
5+
namespace: default
6+
spec:
7+
replicas: 1
8+
updateStrategy:
9+
type: InPlaceUpdate
10+
stateful: true
11+
selector:
12+
matchLabels:
13+
app: sglang-router-1p1d-tp8
14+
template:
15+
metadata:
16+
labels:
17+
app: sglang-router-1p1d-tp8
18+
spec:
19+
roles:
20+
- name: routing
21+
replicas: 1
22+
stateful: true
23+
template:
24+
spec:
25+
containers:
26+
- name: mini-lb
27+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang-router:v0.1.6
28+
command: ["sh", "-c"]
29+
args:
30+
- |
31+
python3 -m sglang_router.launch_router \
32+
--pd-disaggregation \
33+
--host 0.0.0.0 \
34+
--policy random \
35+
--service-discovery \
36+
--service-discovery-port 30000 \
37+
--prefill-selector storm-service-name=$STORM_SERVICE_NAME role-name=prefill \
38+
--decode-selector storm-service-name=$STORM_SERVICE_NAME role-name=decode \
39+
--service-discovery-namespace default
40+
- name: prefill
41+
replicas: 1
42+
stateful: true
43+
template:
44+
metadata:
45+
annotations:
46+
k8s.volcengine.com/pod-networks: |
47+
[
48+
{
49+
"cniConf":{
50+
"name":"rdma"
51+
}
52+
},
53+
{
54+
"cniConf":{
55+
"name":"rdma"
56+
}
57+
},
58+
{
59+
"cniConf":{
60+
"name":"rdma"
61+
}
62+
},
63+
{
64+
"cniConf":{
65+
"name":"rdma"
66+
}
67+
},
68+
{
69+
"cniConf":{
70+
"name":"rdma"
71+
}
72+
},
73+
{
74+
"cniConf":{
75+
"name":"rdma"
76+
}
77+
},
78+
{
79+
"cniConf":{
80+
"name":"rdma"
81+
}
82+
},
83+
{
84+
"cniConf":{
85+
"name":"rdma"
86+
}
87+
}
88+
]
89+
spec:
90+
containers:
91+
- name: prefill
92+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
93+
command: ["sh", "-c"]
94+
args:
95+
- |
96+
python3 -m sglang.launch_server \
97+
--model-path models/Qwen3-235B-A22B \
98+
--served-model-name qwen3-235b \
99+
--host 0.0.0.0 \
100+
--port 30000 \
101+
--disaggregation-mode prefill \
102+
--disaggregation-transfer-backend=mooncake \
103+
--tp-size 8 \
104+
--trust-remote-code \
105+
--mem-fraction-static 0.8 \
106+
--log-level debug
107+
env:
108+
- name: GLOO_SOCKET_IFNAME
109+
value: eth0
110+
- name: NCCL_SOCKET_IFNAME
111+
value: eth0
112+
- name: NCCL_IB_DISABLE
113+
value: "0"
114+
- name: NCCL_IB_GID_INDEX
115+
value: "7"
116+
- name: NCCL_DEBUG
117+
value: "INFO"
118+
volumeMounts:
119+
- name: model-vol
120+
mountPath: /models
121+
readOnly: true
122+
- mountPath: /dev/shm
123+
name: shared-mem
124+
resources:
125+
limits:
126+
nvidia.com/gpu: 8
127+
vke.volcengine.com/rdma: "8"
128+
securityContext:
129+
capabilities:
130+
add:
131+
- IPC_LOCK
132+
volumes:
133+
- name: model-vol
134+
hostPath:
135+
path: /data01/models
136+
type: Directory
137+
- emptyDir:
138+
medium: Memory
139+
name: shared-mem
140+
- name: decode
141+
replicas: 1
142+
stateful: true
143+
template:
144+
metadata:
145+
annotations:
146+
k8s.volcengine.com/pod-networks: |
147+
[
148+
{
149+
"cniConf":{
150+
"name":"rdma"
151+
}
152+
},
153+
{
154+
"cniConf":{
155+
"name":"rdma"
156+
}
157+
},
158+
{
159+
"cniConf":{
160+
"name":"rdma"
161+
}
162+
},
163+
{
164+
"cniConf":{
165+
"name":"rdma"
166+
}
167+
},
168+
{
169+
"cniConf":{
170+
"name":"rdma"
171+
}
172+
},
173+
{
174+
"cniConf":{
175+
"name":"rdma"
176+
}
177+
},
178+
{
179+
"cniConf":{
180+
"name":"rdma"
181+
}
182+
},
183+
{
184+
"cniConf":{
185+
"name":"rdma"
186+
}
187+
}
188+
]
189+
spec:
190+
containers:
191+
- name: decode
192+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
193+
command: ["sh", "-c"]
194+
args:
195+
- |
196+
python3 -m sglang.launch_server \
197+
--model-path models/Qwen3-235B-A22B \
198+
--served-model-name qwen3-235b \
199+
--host 0.0.0.0 \
200+
--port 30000 \
201+
--disaggregation-mode decode \
202+
--disaggregation-transfer-backend=mooncake \
203+
--tp-size 8 \
204+
--trust-remote-code \
205+
--mem-fraction-static 0.8 \
206+
--log-level debug
207+
env:
208+
- name: GLOO_SOCKET_IFNAME
209+
value: eth0
210+
- name: NCCL_SOCKET_IFNAME
211+
value: eth0
212+
- name: NCCL_IB_DISABLE
213+
value: "0"
214+
- name: NCCL_IB_GID_INDEX
215+
value: "7"
216+
- name: NCCL_DEBUG
217+
value: "INFO"
218+
volumeMounts:
219+
- name: model-vol
220+
mountPath: /models
221+
readOnly: true
222+
- mountPath: /dev/shm
223+
name: shared-mem
224+
resources:
225+
limits:
226+
nvidia.com/gpu: 8
227+
vke.volcengine.com/rdma: "8"
228+
securityContext:
229+
capabilities:
230+
add:
231+
- IPC_LOCK
232+
volumes:
233+
- name: model-vol
234+
hostPath:
235+
path: /data01/models
236+
type: Directory
237+
- emptyDir:
238+
medium: Memory
239+
name: shared-mem

test/regression/v0.4.0/sglang/qwen-32b/sglang-base.yaml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@ apiVersion: apps/v1
22
kind: Deployment
33
metadata:
44
labels:
5-
model.aibrix.ai/name: qwen3-8b
5+
model.aibrix.ai/name: qwen3-32b
66
model.aibrix.ai/port: "8000"
7-
name: qwen3-8b
7+
name: qwen3-32b
88
namespace: default
99
spec:
10-
replicas: 1
10+
replicas: 2
1111
selector:
1212
matchLabels:
13-
model.aibrix.ai/name: qwen3-8b
13+
model.aibrix.ai/name: qwen3-32b
1414
template:
1515
metadata:
1616
labels:
17-
model.aibrix.ai/name: qwen3-8b
17+
model.aibrix.ai/name: qwen3-32b
1818
spec:
1919
containers:
2020
- name: sglang-server
@@ -52,4 +52,3 @@ spec:
5252
- emptyDir:
5353
medium: Memory
5454
name: shared-mem
55-

test/regression/v0.4.0/sglang/qwen-8b/sglang-base.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ metadata:
77
name: qwen3-8b
88
namespace: default
99
spec:
10-
replicas: 1
10+
replicas: 2
1111
selector:
1212
matchLabels:
1313
model.aibrix.ai/name: qwen3-8b

0 commit comments

Comments
 (0)