Skip to content

Commit b78772c

Browse files
[Frontend] supports deepseekv32 chat template (#29837)
Signed-off-by: chaunceyjiang <[email protected]>
1 parent f5d3d93 commit b78772c

File tree

5 files changed

+616
-2
lines changed

5 files changed

+616
-2
lines changed

vllm/config/model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484
"transcription",
8585
"draft",
8686
]
87-
TokenizerMode = Literal["auto", "hf", "slow", "mistral"]
87+
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
8888
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
8989
LogprobsMode = Literal[
9090
"raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"
@@ -141,6 +141,7 @@ class ModelConfig:
141141
- "hf" will use the fast tokenizer if available.\n
142142
- "slow" will always use the slow tokenizer.\n
143143
- "mistral" will always use the tokenizer from `mistral_common`.\n
144+
- "deepseek_v32" will always use the tokenizer from `deepseek_v32`.\n
144145
- Other custom values can be supported via plugins."""
145146
trust_remote_code: bool = False
146147
"""Trust remote code (e.g., from HuggingFace) when downloading the model

vllm/entrypoints/openai/serving_engine.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
from vllm.pooling_params import PoolingParams
106106
from vllm.reasoning import ReasoningParser, ReasoningParserManager
107107
from vllm.sampling_params import BeamSearchParams, SamplingParams
108-
from vllm.tokenizers import MistralTokenizer, TokenizerLike
108+
from vllm.tokenizers import DeepseekV32Tokenizer, MistralTokenizer, TokenizerLike
109109
from vllm.tracing import (
110110
contains_trace_headers,
111111
extract_trace_headers,
@@ -1128,6 +1128,13 @@ async def _preprocess_chat(
11281128
messages=messages,
11291129
**_chat_template_kwargs,
11301130
)
1131+
elif isinstance(tokenizer, DeepseekV32Tokenizer):
1132+
request_prompt = tokenizer.apply_chat_template(
1133+
conversation=conversation,
1134+
messages=messages,
1135+
model_config=model_config,
1136+
**_chat_template_kwargs,
1137+
)
11311138
else:
11321139
request_prompt = apply_hf_chat_template(
11331140
tokenizer=tokenizer,

vllm/tokenizers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from .deepseekv32 import DeepseekV32Tokenizer
45
from .hf import HfTokenizer
56
from .mistral import MistralTokenizer
67
from .protocol import TokenizerLike
@@ -21,4 +22,5 @@
2122
"get_tokenizer",
2223
"cached_tokenizer_from_config",
2324
"init_tokenizer_from_config",
25+
"DeepseekV32Tokenizer",
2426
]

0 commit comments

Comments
 (0)