Skip to content

Commit d242f78

Browse files
committed
moves to 235B-A22B-FP8 non-instruct
Signed-off-by: Elnifio <[email protected]>
1 parent 9dfa886 commit d242f78

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

recipes/qwen3-235b-a22b-instruct-2507-fp8/model-cache/model-download.yaml renamed to recipes/qwen3-235b-a22b-fp8/model-cache/model-download.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ spec:
2323
name: hf-token-secret
2424
env:
2525
- name: MODEL_NAME
26-
value: Qwen/Qwen3-235B-A22B-Instruct-2507-FP8
26+
value: Qwen/Qwen3-235B-A22B-FP8
2727
- name: HF_HOME
2828
value: /model-store
2929
- name: HF_HUB_ENABLE_HF_TRANSFER
3030
value: "1"
3131
- name: MODEL_REVISION
32-
value: e156cb4efae43fbee1a1ab073f946a1377e6b969
32+
value: 39eb2b067ea6b8e3e1dd97d3cd0c7ffeaf3e1a35
3333
args:
3434
- |
3535
set -eux

recipes/qwen3-235b-a22b-instruct-2507-fp8/trtllm/disagg/deploy.yaml renamed to recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ spec:
114114
mainContainer:
115115
env:
116116
- name: MODEL_PATH
117-
value: /mnt/model-cache/hub/models--Qwen--Qwen3-235B-A22B-Instruct-2507-FP8/snapshots/e156cb4efae43fbee1a1ab073f946a1377e6b969
117+
value: /mnt/model-cache/hub/models--Qwen--Qwen3-235B-A22B-FP8/snapshots/39eb2b067ea6b8e3e1dd97d3cd0c7ffeaf3e1a35
118118
- name: ENGINE_ARGS
119119
value: /engine_configs/prefill.yaml
120120
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
@@ -126,7 +126,7 @@ spec:
126126
- |
127127
python3 -m dynamo.trtllm \
128128
--model-path "${MODEL_PATH}" \
129-
--served-model-name "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8" \
129+
--served-model-name "Qwen/Qwen3-235B-A22B-FP8" \
130130
--extra-engine-args "${ENGINE_ARGS}" \
131131
--disaggregation-mode prefill \
132132
--disaggregation-strategy prefill_first
@@ -166,7 +166,7 @@ spec:
166166
mainContainer:
167167
env:
168168
- name: MODEL_PATH
169-
value: /mnt/model-cache/hub/models--Qwen--Qwen3-235B-A22B-Instruct-2507-FP8/snapshots/e156cb4efae43fbee1a1ab073f946a1377e6b969
169+
value: /mnt/model-cache/hub/models--Qwen--Qwen3-235B-A22B-FP8/snapshots/39eb2b067ea6b8e3e1dd97d3cd0c7ffeaf3e1a35
170170
- name: ENGINE_ARGS
171171
value: /engine_configs/decode.yaml
172172
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
@@ -178,7 +178,7 @@ spec:
178178
- |
179179
python3 -m dynamo.trtllm \
180180
--model-path "${MODEL_PATH}" \
181-
--served-model-name "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8" \
181+
--served-model-name "Qwen/Qwen3-235B-A22B-FP8" \
182182
--extra-engine-args "${ENGINE_ARGS}" \
183183
--disaggregation-mode decode \
184184
--disaggregation-strategy prefill_first

recipes/qwen3-235b-a22b-instruct-2507-fp8/trtllm/disagg/perf.yaml renamed to recipes/qwen3-235b-a22b-fp8/trtllm/disagg/perf.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ spec:
109109
echo "done with concurrency $TOTAL_CONCURRENCY"
110110
env:
111111
- name: TARGET_MODEL
112-
value: Qwen/Qwen3-235B-A22B-Instruct-2507-FP8
112+
value: Qwen/Qwen3-235B-A22B-FP8
113113
- name: ENDPOINT
114114
value: qwen3-235b-a22b-disagg-frontend:8000
115115
- name: CONCURRENCY_PER_GPU

0 commit comments

Comments
 (0)