Skip to content

Commit 3432927

Browse files
committed
base yaml without pd
Signed-off-by: Ning Wang <[email protected]>
1 parent 3e6d9c4 commit 3432927

9 files changed

+166
-6
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
model.aibrix.ai/name: qwen3-8b
6+
model.aibrix.ai/port: "8000"
7+
name: qwen3-8b
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
model.aibrix.ai/name: qwen3-8b
14+
template:
15+
metadata:
16+
labels:
17+
model.aibrix.ai/name: qwen3-8b
18+
spec:
19+
containers:
20+
- name: sglang-server
21+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
22+
command: ["sh", "-c"]
23+
args:
24+
- |
25+
python3 -m sglang.launch_server \
26+
--model-path /models/Qwen3-32B \
27+
--served-model-name qwen3-32b \
28+
--host 0.0.0.0 \
29+
--port 8000 \
30+
--trust-remote-code \
31+
--enable-metrics \
32+
--mem-fraction-static 0.8 \
33+
--log-level debug
34+
resources:
35+
limits:
36+
nvidia.com/gpu: 1
37+
volumeMounts:
38+
- name: model-vol
39+
mountPath: /models
40+
readOnly: true
41+
- mountPath: /dev/shm
42+
name: shared-mem
43+
securityContext:
44+
capabilities:
45+
add:
46+
- IPC_LOCK
47+
volumes:
48+
- name: model-vol
49+
hostPath:
50+
path: /data01/models
51+
type: Directory
52+
- emptyDir:
53+
medium: Memory
54+
name: shared-mem
55+
File renamed without changes.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
model.aibrix.ai/name: qwen3-8b
6+
model.aibrix.ai/port: "8000"
7+
name: qwen3-8b
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
model.aibrix.ai/name: qwen3-8b
14+
template:
15+
metadata:
16+
labels:
17+
model.aibrix.ai/name: qwen3-8b
18+
spec:
19+
containers:
20+
- name: sglang-server
21+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
22+
command: ["sh", "-c"]
23+
args:
24+
- |
25+
python3 -m sglang.launch_server \
26+
--model-path /models/Qwen3-8B \
27+
--served-model-name qwen3-8b \
28+
--host 0.0.0.0 \
29+
--port 8000 \
30+
--trust-remote-code \
31+
--enable-metrics \
32+
--mem-fraction-static 0.8 \
33+
--log-level debug
34+
resources:
35+
limits:
36+
nvidia.com/gpu: 1
37+
volumeMounts:
38+
- name: model-vol
39+
mountPath: /models
40+
readOnly: true
41+
- mountPath: /dev/shm
42+
name: shared-mem
43+
securityContext:
44+
capabilities:
45+
add:
46+
- IPC_LOCK
47+
volumes:
48+
- name: model-vol
49+
hostPath:
50+
path: /data01/models
51+
type: Directory
52+
- emptyDir:
53+
medium: Memory
54+
name: shared-mem
File renamed without changes.
File renamed without changes.

test/regression/v0.4.0/vllm/aibrix-router-1p1d.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ spec:
3333
]
3434
labels:
3535
model.aibrix.ai/name: qwen3-8b
36-
model.aibrix.ai/port: "30000"
36+
model.aibrix.ai/port: "8000"
3737
model.aibrix.ai/engine: vllm
3838
spec:
3939
containers:
@@ -108,7 +108,7 @@ spec:
108108
]
109109
labels:
110110
model.aibrix.ai/name: qwen3-8b
111-
model.aibrix.ai/port: "30000"
111+
model.aibrix.ai/port: "8000"
112112
model.aibrix.ai/engine: vllm
113113
spec:
114114
containers:

test/regression/v0.4.0/vllm/aibrix-router-2p2d-tp2.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ spec:
3838
]
3939
labels:
4040
model.aibrix.ai/name: qwen3-32b
41-
model.aibrix.ai/port: "30000"
41+
model.aibrix.ai/port: "8000"
4242
model.aibrix.ai/engine: vllm
4343
spec:
4444
containers:
@@ -119,7 +119,7 @@ spec:
119119
]
120120
labels:
121121
model.aibrix.ai/name: qwen3-32b
122-
model.aibrix.ai/port: "30000"
122+
model.aibrix.ai/port: "8000"
123123
model.aibrix.ai/engine: vllm
124124
spec:
125125
containers:

test/regression/v0.4.0/vllm/aibrix-router-4p3d.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ spec:
3333
]
3434
labels:
3535
model.aibrix.ai/name: qwen3-8b
36-
model.aibrix.ai/port: "30000"
36+
model.aibrix.ai/port: "8000"
3737
model.aibrix.ai/engine: vllm
3838
spec:
3939
containers:
@@ -108,7 +108,7 @@ spec:
108108
]
109109
labels:
110110
model.aibrix.ai/name: qwen3-8b
111-
model.aibrix.ai/port: "30000"
111+
model.aibrix.ai/port: "8000"
112112
model.aibrix.ai/engine: vllm
113113
spec:
114114
containers:
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
model.aibrix.ai/name: qwen3-8b
6+
model.aibrix.ai/port: "8000"
7+
name: qwen3-8b
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
model.aibrix.ai/name: qwen3-8b
14+
template:
15+
metadata:
16+
labels:
17+
model.aibrix.ai/name: qwen3-8b
18+
spec:
19+
containers:
20+
- name: vllm-openai
21+
image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
22+
command: ["sh", "-c"]
23+
args:
24+
- |
25+
python3 -m vllm.entrypoints.openai.api_server \
26+
--host "0.0.0.0" \
27+
--port "8000" \
28+
--uvicorn-log-level warning \
29+
--model /models/Qwen3-8B \
30+
--served-model-name qwen3-8b
31+
resources:
32+
limits:
33+
nvidia.com/gpu: 1
34+
volumeMounts:
35+
- name: model-vol
36+
mountPath: /models
37+
readOnly: true
38+
- mountPath: /dev/shm
39+
name: shared-mem
40+
securityContext:
41+
capabilities:
42+
add:
43+
- IPC_LOCK
44+
volumes:
45+
- name: model-vol
46+
hostPath:
47+
path: /data01/models
48+
type: Directory
49+
- emptyDir:
50+
medium: Memory
51+
name: shared-mem

0 commit comments

Comments
 (0)