Skip to content

Commit 2b37523

Browse files
committed
[Frontend] supports deepseekv32 chat template
Signed-off-by: chaunceyjiang <[email protected]>
1 parent fc95521 commit 2b37523

File tree

4 files changed

+471
-3
lines changed

4 files changed

+471
-3
lines changed

vllm/config/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
"transcription",
8787
"draft",
8888
]
89-
TokenizerMode = Literal["auto", "hf", "slow", "mistral"]
89+
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "custom", ]
9090
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
9191
LogprobsMode = Literal[
9292
"raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"
@@ -143,7 +143,7 @@ class ModelConfig:
143143
- "hf" will use the fast tokenizer if available.\n
144144
- "slow" will always use the slow tokenizer.\n
145145
- "mistral" will always use the tokenizer from `mistral_common`.\n
146-
- Other custom values can be supported via plugins."""
146+
- "custom" will use --tokenizer to select the preregistered tokenizer."""
147147
trust_remote_code: bool = False
148148
"""Trust remote code (e.g., from HuggingFace) when downloading the model
149149
and tokenizer."""

vllm/entrypoints/openai/serving_engine.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
from vllm.pooling_params import PoolingParams
107107
from vllm.reasoning import ReasoningParser, ReasoningParserManager
108108
from vllm.sampling_params import BeamSearchParams, SamplingParams
109-
from vllm.tokenizers import MistralTokenizer, TokenizerLike
109+
from vllm.tokenizers import DeepseekV32Tokenizer, MistralTokenizer, TokenizerLike
110110
from vllm.tracing import (
111111
contains_trace_headers,
112112
extract_trace_headers,
@@ -1129,6 +1129,12 @@ async def _preprocess_chat(
11291129
messages=messages,
11301130
**_chat_template_kwargs,
11311131
)
1132+
elif isinstance(tokenizer, DeepseekV32Tokenizer):
1133+
request_prompt = tokenizer.apply_chat_template(
1134+
messages=messages,
1135+
model_config=model_config,
1136+
**_chat_template_kwargs,
1137+
)
11321138
else:
11331139
request_prompt = apply_hf_chat_template(
11341140
tokenizer=tokenizer,

vllm/tokenizers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from .deepseekv32 import DeepseekV32Tokenizer
45
from .hf import HfTokenizer
56
from .mistral import MistralTokenizer
67
from .protocol import TokenizerLike
@@ -12,4 +13,5 @@
1213
"MistralTokenizer",
1314
"TokenizerRegistry",
1415
"get_tokenizer",
16+
"DeepseekV32Tokenizer",
1517
]

0 commit comments

Comments
 (0)