base yaml without pd

nwangfw · nwangfw · commit 3432927a338f · 2025-07-31T18:13:28.000-07:00
Signed-off-by: Ning Wang &lt;n.wang.chn@hotmail.com&gt;
diff --git a/test/regression/v0.4.0/sglang/qwen-32b/sglang-base.yaml b/test/regression/v0.4.0/sglang/qwen-32b/sglang-base.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    model.aibrix.ai/name: qwen3-8b
+    model.aibrix.ai/port: "8000"
+  name: qwen3-8b
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      model.aibrix.ai/name: qwen3-8b
+  template:
+    metadata:
+      labels:
+        model.aibrix.ai/name: qwen3-8b
+    spec:
+      containers:
+        - name: sglang-server
+          image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
+          command: ["sh", "-c"]
+          args:
+            - |
+              python3 -m sglang.launch_server \
+                --model-path /models/Qwen3-32B \
+                --served-model-name qwen3-32b \
+                --host 0.0.0.0 \
+                --port 8000 \
+                --trust-remote-code \
+                --enable-metrics \
+                --mem-fraction-static 0.8 \
+                --log-level debug
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+          volumeMounts:
+            - name: model-vol
+              mountPath: /models
+              readOnly: true
+            - mountPath: /dev/shm
+              name: shared-mem
+          securityContext:
+            capabilities:
+              add:
+                - IPC_LOCK
+      volumes:
+        - name: model-vol
+          hostPath:
+            path: /data01/models
+            type: Directory
+        - emptyDir:
+            medium: Memory
+          name: shared-mem
+
diff --git a/test/regression/v0.4.0/sglang/qwen-32b/sglang-router-2p2d-tp2-cache.yaml b/test/regression/v0.4.0/sglang/qwen-32b/sglang-router-2p2d-tp2-cache.yaml
diff --git a/test/regression/v0.4.0/sglang/qwen-8b/sglang-base.yaml b/test/regression/v0.4.0/sglang/qwen-8b/sglang-base.yaml
@@ -0,0 +1,54 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    model.aibrix.ai/name: qwen3-8b
+    model.aibrix.ai/port: "8000"
+  name: qwen3-8b
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      model.aibrix.ai/name: qwen3-8b
+  template:
+    metadata:
+      labels:
+        model.aibrix.ai/name: qwen3-8b
+    spec:
+      containers:
+        - name: sglang-server
+          image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/sglang:v0.4.9.post3-cu126-nixl-v0.4.1
+          command: ["sh", "-c"]
+          args:
+            - |
+              python3 -m sglang.launch_server \
+                --model-path /models/Qwen3-8B \
+                --served-model-name qwen3-8b \
+                --host 0.0.0.0 \
+                --port 8000 \
+                --trust-remote-code \
+                --enable-metrics \
+                --mem-fraction-static 0.8 \
+                --log-level debug
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+          volumeMounts:
+            - name: model-vol
+              mountPath: /models
+              readOnly: true
+            - mountPath: /dev/shm
+              name: shared-mem
+          securityContext:
+            capabilities:
+              add:
+                - IPC_LOCK
+      volumes:
+        - name: model-vol
+          hostPath:
+            path: /data01/models
+            type: Directory
+        - emptyDir:
+            medium: Memory
+          name: shared-mem
diff --git a/test/regression/v0.4.0/sglang/qwen-8b/sglang-router-1p1d-cache.yaml b/test/regression/v0.4.0/sglang/qwen-8b/sglang-router-1p1d-cache.yaml
diff --git a/test/regression/v0.4.0/sglang/qwen-8b/sglang-router-4p3d-cache.yaml b/test/regression/v0.4.0/sglang/qwen-8b/sglang-router-4p3d-cache.yaml
diff --git a/test/regression/v0.4.0/vllm/aibrix-router-1p1d.yaml b/test/regression/v0.4.0/vllm/aibrix-router-1p1d.yaml
@@ -33,7 +33,7 @@ spec:
                   ]
               labels:
                 model.aibrix.ai/name: qwen3-8b
-                model.aibrix.ai/port: "30000"
+                model.aibrix.ai/port: "8000"
                 model.aibrix.ai/engine: vllm
             spec:
               containers:
@@ -108,7 +108,7 @@ spec:
                   ]
               labels:
                 model.aibrix.ai/name: qwen3-8b
-                model.aibrix.ai/port: "30000"
+                model.aibrix.ai/port: "8000"
                 model.aibrix.ai/engine: vllm
             spec:
               containers:
diff --git a/test/regression/v0.4.0/vllm/aibrix-router-2p2d-tp2.yaml b/test/regression/v0.4.0/vllm/aibrix-router-2p2d-tp2.yaml
@@ -38,7 +38,7 @@ spec:
                   ]
               labels:
                 model.aibrix.ai/name: qwen3-32b
-                model.aibrix.ai/port: "30000"
+                model.aibrix.ai/port: "8000"
                 model.aibrix.ai/engine: vllm
             spec:
               containers:
@@ -119,7 +119,7 @@ spec:
                   ]
               labels:
                 model.aibrix.ai/name: qwen3-32b
-                model.aibrix.ai/port: "30000"
+                model.aibrix.ai/port: "8000"
                 model.aibrix.ai/engine: vllm
             spec:
               containers:
diff --git a/test/regression/v0.4.0/vllm/aibrix-router-4p3d.yaml b/test/regression/v0.4.0/vllm/aibrix-router-4p3d.yaml
@@ -33,7 +33,7 @@ spec:
                   ]
               labels:
                 model.aibrix.ai/name: qwen3-8b
-                model.aibrix.ai/port: "30000"
+                model.aibrix.ai/port: "8000"
                 model.aibrix.ai/engine: vllm
             spec:
               containers:
@@ -108,7 +108,7 @@ spec:
                   ]
               labels:
                 model.aibrix.ai/name: qwen3-8b
-                model.aibrix.ai/port: "30000"
+                model.aibrix.ai/port: "8000"
                 model.aibrix.ai/engine: vllm
             spec:
               containers:
diff --git a/test/regression/v0.4.0/vllm/vllm-base.yaml b/test/regression/v0.4.0/vllm/vllm-base.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    model.aibrix.ai/name: qwen3-8b
+    model.aibrix.ai/port: "8000"
+  name: qwen3-8b
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      model.aibrix.ai/name: qwen3-8b
+  template:
+    metadata:
+      labels:
+        model.aibrix.ai/name: qwen3-8b
+    spec:
+      containers:
+        - name: vllm-openai
+          image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.9.2-cu128-nixl-v0.4.1-lmcache-0.3.1.post1
+          command: ["sh", "-c"]
+          args:
+            - |
+              python3 -m vllm.entrypoints.openai.api_server \
+              --host "0.0.0.0" \
+              --port "8000" \
+              --uvicorn-log-level warning \
+              --model /models/Qwen3-8B \
+              --served-model-name qwen3-8b
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+          volumeMounts:
+            - name: model-vol
+              mountPath: /models
+              readOnly: true
+            - mountPath: /dev/shm
+              name: shared-mem
+          securityContext:
+            capabilities:
+              add:
+                - IPC_LOCK
+      volumes:
+        - name: model-vol
+          hostPath:
+            path: /data01/models
+            type: Directory
+        - emptyDir:
+            medium: Memory
+          name: shared-mem