Skip to content

Commit a87001f

Browse files
committed
Fix CLI
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 1283df4 commit a87001f

File tree

5 files changed

+12
-22
lines changed

5 files changed

+12
-22
lines changed

tests/models/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class _HfExamplesInfo:
2323
tokenizer: str | None = None
2424
"""Set the tokenizer to load for this architecture."""
2525

26-
tokenizer_mode: TokenizerMode = "auto"
26+
tokenizer_mode: TokenizerMode | str = "auto"
2727
"""Set the tokenizer type for this architecture."""
2828

2929
speculative_model: str | None = None

tests/v1/entrypoints/llm/test_struct_output_generate.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import json
66
from enum import Enum
7-
from typing import TYPE_CHECKING, Any
7+
from typing import Any
88

99
import jsonschema
1010
import pytest
@@ -24,11 +24,6 @@
2424
StructuredOutputsParams,
2525
)
2626

27-
if TYPE_CHECKING:
28-
from vllm.config.model import TokenizerMode
29-
else:
30-
TokenizerMode = str
31-
3227
NGRAM_SPEC_CONFIG = {
3328
"model": "[ngram]",
3429
"num_speculative_tokens": 5,
@@ -627,7 +622,7 @@ def test_structured_output(
627622
)
628623
def test_structured_output_with_reasoning_matrices(
629624
backend: str,
630-
tokenizer_mode: TokenizerMode,
625+
tokenizer_mode: str,
631626
reasoning_parser: str,
632627
model_name: str,
633628
speculative_config: dict[str, Any] | None,

vllm/config/model.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
"transcription",
8787
"draft",
8888
]
89-
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "custom"]
89+
TokenizerMode = Literal["auto", "hf", "slow", "mistral"]
9090
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
9191
LogprobsMode = Literal[
9292
"raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"
@@ -137,13 +137,13 @@ class ModelConfig:
137137
tokenizer: SkipValidation[str] = None # type: ignore
138138
"""Name or path of the Hugging Face tokenizer to use. If unspecified, model
139139
name or path will be used."""
140-
tokenizer_mode: TokenizerMode = "auto"
140+
tokenizer_mode: TokenizerMode | str = "auto"
141141
"""Tokenizer mode:\n
142142
- "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.\n
143143
- "hf" will use the fast tokenizer if available.\n
144144
- "slow" will always use the slow tokenizer.\n
145145
- "mistral" will always use the tokenizer from `mistral_common`.\n
146-
- "custom" will use --tokenizer to select the preregistered tokenizer."""
146+
- Other custom values can be supported via plugins."""
147147
trust_remote_code: bool = False
148148
"""Trust remote code (e.g., from HuggingFace) when downloading the model
149149
and tokenizer."""
@@ -718,6 +718,10 @@ def _task_to_convert(task: TaskOption) -> ConvertType:
718718
self._verify_cuda_graph()
719719
self._verify_bnb_config()
720720

721+
@field_validator("tokenizer_mode", mode="after")
722+
def _lowercase_tokenizer_mode(cls, tokenizer_mode: str) -> str:
723+
return tokenizer_mode.lower()
724+
721725
@field_validator("quantization", mode="before")
722726
@classmethod
723727
def validate_quantization_before(cls, value: Any) -> Any:
@@ -829,15 +833,6 @@ def _get_encoder_config(self):
829833
model, _ = split_remote_gguf(model)
830834
return get_sentence_transformer_tokenizer_config(model, self.revision)
831835

832-
def _verify_tokenizer_mode(self) -> None:
833-
tokenizer_mode = cast(TokenizerMode, self.tokenizer_mode.lower())
834-
if tokenizer_mode not in get_args(TokenizerMode):
835-
raise ValueError(
836-
f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be "
837-
f"one of {get_args(TokenizerMode)}."
838-
)
839-
self.tokenizer_mode = tokenizer_mode
840-
841836
def _get_default_runner_type(
842837
self,
843838
architectures: list[str],

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ class EngineArgs:
360360
task: TaskOption | None = ModelConfig.task
361361
skip_tokenizer_init: bool = ModelConfig.skip_tokenizer_init
362362
enable_prompt_embeds: bool = ModelConfig.enable_prompt_embeds
363-
tokenizer_mode: TokenizerMode = ModelConfig.tokenizer_mode
363+
tokenizer_mode: TokenizerMode | str = ModelConfig.tokenizer_mode
364364
trust_remote_code: bool = ModelConfig.trust_remote_code
365365
allowed_local_media_path: str = ModelConfig.allowed_local_media_path
366366
allowed_media_domains: list[str] | None = ModelConfig.allowed_media_domains

vllm/entrypoints/llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def __init__(
188188
runner: RunnerOption = "auto",
189189
convert: ConvertOption = "auto",
190190
tokenizer: str | None = None,
191-
tokenizer_mode: TokenizerMode = "auto",
191+
tokenizer_mode: TokenizerMode | str = "auto",
192192
skip_tokenizer_init: bool = False,
193193
trust_remote_code: bool = False,
194194
allowed_local_media_path: str = "",

0 commit comments

Comments
 (0)