File tree Expand file tree Collapse file tree 6 files changed +160
-0
lines changed Expand file tree Collapse file tree 6 files changed +160
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : Deployment
3+ metadata :
4+ labels :
5+ model.aibrix.ai/name : qwen3-8b
6+ model.aibrix.ai/port : " 8000"
7+ name : qwen3-8b
8+ namespace : default
9+ spec :
10+ replicas : 1
11+ selector :
12+ matchLabels :
13+ model.aibrix.ai/name : qwen3-8b
14+ template :
15+ metadata :
16+ labels :
17+ model.aibrix.ai/name : qwen3-8b
18+ spec :
19+ containers :
20+ - name : sglang-server
21+ image : kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
22+ command : ["sh", "-c"]
23+ args :
24+ - |
25+ python3 -m sglang.launch_server \
26+ --model-path /models/Qwen3-32B \
27+ --served-model-name qwen3-32b \
28+ --host 0.0.0.0 \
29+ --port 8000 \
30+ --trust-remote-code \
31+ --enable-metrics \
32+ --mem-fraction-static 0.8 \
33+ --log-level debug
34+ resources :
35+ limits :
36+ nvidia.com/gpu : 1
37+ volumeMounts :
38+ - name : model-vol
39+ mountPath : /models
40+ readOnly : true
41+ - mountPath : /dev/shm
42+ name : shared-mem
43+ securityContext :
44+ capabilities :
45+ add :
46+ - IPC_LOCK
47+ volumes :
48+ - name : model-vol
49+ hostPath :
50+ path : /data01/models
51+ type : Directory
52+ - emptyDir :
53+ medium : Memory
54+ name : shared-mem
55+
File renamed without changes.
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : Deployment
3+ metadata :
4+ labels :
5+ model.aibrix.ai/name : qwen3-8b
6+ model.aibrix.ai/port : " 8000"
7+ name : qwen3-8b
8+ namespace : default
9+ spec :
10+ replicas : 1
11+ selector :
12+ matchLabels :
13+ model.aibrix.ai/name : qwen3-8b
14+ template :
15+ metadata :
16+ labels :
17+ model.aibrix.ai/name : qwen3-8b
18+ spec :
19+ containers :
20+ - name : sglang-server
21+ image : kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
22+ command : ["sh", "-c"]
23+ args :
24+ - |
25+ python3 -m sglang.launch_server \
26+ --model-path /models/Qwen3-8B \
27+ --served-model-name qwen3-8b \
28+ --host 0.0.0.0 \
29+ --port 8000 \
30+ --trust-remote-code \
31+ --enable-metrics \
32+ --mem-fraction-static 0.8 \
33+ --log-level debug
34+ resources :
35+ limits :
36+ nvidia.com/gpu : 1
37+ volumeMounts :
38+ - name : model-vol
39+ mountPath : /models
40+ readOnly : true
41+ - mountPath : /dev/shm
42+ name : shared-mem
43+ securityContext :
44+ capabilities :
45+ add :
46+ - IPC_LOCK
47+ volumes :
48+ - name : model-vol
49+ hostPath :
50+ path : /data01/models
51+ type : Directory
52+ - emptyDir :
53+ medium : Memory
54+ name : shared-mem
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : Deployment
3+ metadata :
4+ labels :
5+ model.aibrix.ai/name : qwen3-8b
6+ model.aibrix.ai/port : " 8000"
7+ name : qwen3-8b
8+ namespace : default
9+ spec :
10+ replicas : 1
11+ selector :
12+ matchLabels :
13+ model.aibrix.ai/name : qwen3-8b
14+ template :
15+ metadata :
16+ labels :
17+ model.aibrix.ai/name : qwen3-8b
18+ spec :
19+ containers :
20+ - name : vllm-openai
21+ image : kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
22+ command : ["sh", "-c"]
23+ args :
24+ - |
25+ python3 -m vllm.entrypoints.openai.api_server \
26+ --host "0.0.0.0" \
27+ --port "8000" \
28+ --uvicorn-log-level warning \
29+ --model /models/Qwen3-8B \
30+ --served-model-name qwen3-8b
31+ resources :
32+ limits :
33+ nvidia.com/gpu : 1
34+ volumeMounts :
35+ - name : model-vol
36+ mountPath : /models
37+ readOnly : true
38+ - mountPath : /dev/shm
39+ name : shared-mem
40+ securityContext :
41+ capabilities :
42+ add :
43+ - IPC_LOCK
44+ volumes :
45+ - name : model-vol
46+ hostPath :
47+ path : /data01/models
48+ type : Directory
49+ - emptyDir :
50+ medium : Memory
51+ name : shared-mem
You can’t perform that action at this time.
0 commit comments