Skip to content

Commit ffbcb54

Browse files
authored
Add initial v0.4.0 regression test yamls (#1337)
Signed-off-by: Jiaxin Shan <[email protected]>
1 parent 3491bb7 commit ffbcb54

File tree

5 files changed

+589
-0
lines changed

5 files changed

+589
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
6+
model.aibrix.ai/engine: sglang
7+
model.aibrix.ai/port: "8000"
8+
name: deepseek-r1-distill-llama-8b
9+
namespace: default
10+
spec:
11+
replicas: 1
12+
selector:
13+
matchLabels:
14+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
15+
model.aibrix.ai/engine: sglang
16+
model.aibrix.ai/port: "8000"
17+
template:
18+
metadata:
19+
labels:
20+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
21+
model.aibrix.ai/engine: sglang
22+
model.aibrix.ai/port: "8000"
23+
spec:
24+
containers:
25+
- command:
26+
- python3
27+
- -m
28+
- sglang.launch_server
29+
- --host
30+
- "0.0.0.0"
31+
- --port
32+
- "8000"
33+
- --model-path
34+
- /root/models/DeepSeek-R1-Distill-Llama-8B
35+
- --served-model-name
36+
- DeepSeek-R1-Distill-Llama-8B
37+
- --attention-backend
38+
- flashinfer
39+
- --enable-metrics
40+
- --log-level-http
41+
- "warning"
42+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
43+
imagePullPolicy: IfNotPresent
44+
name: sglang
45+
ports:
46+
- containerPort: 8000
47+
protocol: TCP
48+
resources:
49+
limits:
50+
nvidia.com/gpu: "1"
51+
requests:
52+
nvidia.com/gpu: "1"
53+
volumeMounts:
54+
- name: model-vol
55+
mountPath: /root/models
56+
volumes:
57+
- name: model-vol
58+
hostPath:
59+
path: /root/models
60+
type: Directory
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b # Note: The label value `model.aibrix.ai/name` here must match with the service name.
6+
model.aibrix.ai/engine: vllm
7+
model.aibrix.ai/port: "8000"
8+
name: deepseek-r1-distill-llama-8b
9+
namespace: default
10+
spec:
11+
replicas: 1
12+
selector:
13+
matchLabels:
14+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
15+
model.aibrix.ai/engine: vllm
16+
model.aibrix.ai/port: "8000"
17+
template:
18+
metadata:
19+
labels:
20+
model.aibrix.ai/name: deepseek-r1-distill-llama-8b
21+
model.aibrix.ai/engine: vllm
22+
model.aibrix.ai/port: "8000"
23+
spec:
24+
containers:
25+
- command:
26+
- python3
27+
- -m
28+
- vllm.entrypoints.openai.api_server
29+
- --host
30+
- "0.0.0.0"
31+
- --port
32+
- "8000"
33+
- --uvicorn-log-level
34+
- warning
35+
- --model
36+
- deepseek-ai/DeepSeek-R1-Distill-Llama-8B
37+
- --served-model-name
38+
- deepseek-r1-distill-llama-8b
39+
- --max-model-len
40+
- "12288"
41+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
42+
imagePullPolicy: IfNotPresent
43+
name: vllm-openai
44+
ports:
45+
- containerPort: 8000
46+
protocol: TCP
47+
resources:
48+
limits:
49+
nvidia.com/gpu: "1"
50+
requests:
51+
nvidia.com/gpu: "1"
52+
volumeMounts:
53+
- name: model-vol
54+
mountPath: /root/models
55+
volumes:
56+
- name: model-vol
57+
hostPath:
58+
path: /root/models
59+
type: Directory
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
apiVersion: orchestration.aibrix.ai/v1alpha1
2+
kind: StormService
3+
metadata:
4+
name: sglang-aibrix-router
5+
spec:
6+
replicas: 1
7+
updateStrategy:
8+
type: InPlaceUpdate
9+
stateful: true
10+
selector:
11+
matchLabels:
12+
app: pool-xpyd
13+
template:
14+
metadata:
15+
labels:
16+
app: pool-xpyd
17+
spec:
18+
roles:
19+
- name: prefill
20+
replicas: 4
21+
stateful: true
22+
template:
23+
metadata:
24+
annotations:
25+
k8s.volcengine.com/pod-networks: |
26+
[
27+
{
28+
"cniConf":{
29+
"name":"rdma"
30+
}
31+
}
32+
]
33+
spec:
34+
containers:
35+
- name: prefill
36+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
37+
command: ["sh", "-c"]
38+
args:
39+
- |
40+
python3 -m sglang.launch_server \
41+
--model-path /models/Qwen3-8B \
42+
--served-model-name qwen3-8b \
43+
--host 0.0.0.0 \
44+
--port 30000 \
45+
--disaggregation-mode prefill \
46+
--disaggregation-transfer-backend=nixl \
47+
--trust-remote-code \
48+
--mem-fraction-static 0.8 \
49+
--log-level debug
50+
env:
51+
- name: GLOO_SOCKET_IFNAME
52+
value: eth0
53+
- name: NCCL_SOCKET_IFNAME
54+
value: eth0
55+
- name: NCCL_IB_DISABLE
56+
value: "0"
57+
- name: NCCL_IB_GID_INDEX
58+
value: "7"
59+
- name: NCCL_DEBUG
60+
value: "INFO"
61+
volumeMounts:
62+
- name: model-vol
63+
mountPath: /models
64+
- mountPath: /dev/shm
65+
name: shared-mem
66+
resources:
67+
limits:
68+
nvidia.com/gpu: 1
69+
vke.volcengine.com/rdma: "1"
70+
securityContext:
71+
capabilities:
72+
add:
73+
- IPC_LOCK
74+
volumes:
75+
- name: model-vol
76+
hostPath:
77+
path: /root/models
78+
type: Directory
79+
- emptyDir:
80+
medium: Memory
81+
name: shared-mem
82+
- name: decode
83+
replicas: 3
84+
stateful: true
85+
template:
86+
metadata:
87+
annotations:
88+
k8s.volcengine.com/pod-networks: |
89+
[
90+
{
91+
"cniConf":{
92+
"name":"rdma"
93+
}
94+
}
95+
]
96+
spec:
97+
containers:
98+
- name: decode
99+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
100+
command: ["sh", "-c"]
101+
args:
102+
- |
103+
python3 -m sglang.launch_server \
104+
--model-path /models/Qwen3-8B \
105+
--served-model-name qwen3-8b \
106+
--host 0.0.0.0 \
107+
--port 30000 \
108+
--disaggregation-mode decode \
109+
--disaggregation-transfer-backend=nixl \
110+
--trust-remote-code \
111+
--mem-fraction-static 0.8 \
112+
--log-level debug
113+
env:
114+
- name: GLOO_SOCKET_IFNAME
115+
value: eth0
116+
- name: NCCL_SOCKET_IFNAME
117+
value: eth0
118+
- name: NCCL_IB_DISABLE
119+
value: "0"
120+
- name: NCCL_IB_GID_INDEX
121+
value: "7"
122+
- name: NCCL_DEBUG
123+
value: "INFO"
124+
volumeMounts:
125+
- name: model-vol
126+
mountPath: /models
127+
- mountPath: /dev/shm
128+
name: shared-mem
129+
resources:
130+
limits:
131+
nvidia.com/gpu: 1
132+
vke.volcengine.com/rdma: "1"
133+
securityContext:
134+
capabilities:
135+
add:
136+
- IPC_LOCK
137+
volumes:
138+
- name: model-vol
139+
hostPath:
140+
path: /root/models
141+
type: Directory
142+
- emptyDir:
143+
medium: Memory
144+
name: shared-mem

0 commit comments

Comments
 (0)