Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions tests/entrypoints/openai/test_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ async def test_tokenize_completions(
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast")
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)

for add_special in [False, True]:
prompt = "vllm1 This is a test prompt."
Expand Down Expand Up @@ -87,7 +87,7 @@ async def test_tokenize_chat(
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast")
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)

for add_generation in [False, True]:
for add_special in [False, True]:
Expand Down Expand Up @@ -140,7 +140,7 @@ async def test_tokenize_chat_with_tools(
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast")
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)

for add_generation in [False, True]:
for add_special in [False, True]:
Expand Down Expand Up @@ -210,7 +210,7 @@ async def test_tokenize_with_return_token_strs(
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast")
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)

prompt = "This is a token_strs test prompt! vllm1"
response = requests.post(
Expand Down Expand Up @@ -240,7 +240,7 @@ async def test_detokenize(
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast")
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)

prompt = "This is a test prompt. vllm1"
tokens = tokenizer.encode(prompt, add_special_tokens=False)
Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/pooling/embed/test_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ async def test_conversation_embedding(
chat_response.raise_for_status()
chat_embeddings = EmbeddingResponse.model_validate(chat_response.json())

tokenizer = get_tokenizer(tokenizer_name=model_name, tokenizer_mode="fast")
tokenizer = get_tokenizer(tokenizer_name=model_name)
prompt = tokenizer.apply_chat_template(
messages,
chat_template=DUMMY_CHAT_TEMPLATE,
Expand Down
6 changes: 1 addition & 5 deletions tests/entrypoints/pooling/pooling/test_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,7 @@ async def test_conversation_pooling(server: RemoteOpenAIServer, model_name: str)
chat_response.raise_for_status()
chat_poolings = PoolingResponse.model_validate(chat_response.json())

tokenizer = get_tokenizer(
tokenizer_name=model_name,
tokenizer_mode="fast",
trust_remote_code=True,
)
tokenizer = get_tokenizer(tokenizer_name=model_name, trust_remote_code=True)
prompt = tokenizer.apply_chat_template(
messages,
chat_template=DUMMY_CHAT_TEMPLATE,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class _HfExamplesInfo:
tokenizer: str | None = None
"""Set the tokenizer to load for this architecture."""

tokenizer_mode: TokenizerMode = "auto"
tokenizer_mode: TokenizerMode | str = "auto"
"""Set the tokenizer type for this architecture."""

speculative_model: str | None = None
Expand Down
25 changes: 21 additions & 4 deletions tests/tokenizers_/test_registry.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from pathlib import Path

from vllm.tokenizers import TokenizerLike, TokenizerRegistry
from vllm.transformers_utils.tokenizer import get_tokenizer


class TestTokenizer(TokenizerLike):
@classmethod
def from_pretrained(cls, *args, **kwargs) -> "TestTokenizer":
return TestTokenizer() # type: ignore
def from_pretrained(
cls,
path_or_repo_id: str | Path,
*args,
trust_remote_code: bool = False,
revision: str | None = None,
download_dir: str | None = None,
**kwargs,
) -> "TestTokenizer":
return TestTokenizer(path_or_repo_id) # type: ignore

def __init__(self, path_or_repo_id: str | Path) -> None:
super().__init__()

self.path_or_repo_id = path_or_repo_id

@property
def bos_token_id(self) -> int:
Expand All @@ -29,14 +44,16 @@ def is_fast(self) -> bool:
def test_customized_tokenizer():
TokenizerRegistry.register("test_tokenizer", __name__, TestTokenizer.__name__)

tokenizer = TokenizerRegistry.get_tokenizer("test_tokenizer")
tokenizer = TokenizerRegistry.get_tokenizer("test_tokenizer", "abc")
assert isinstance(tokenizer, TestTokenizer)
assert tokenizer.path_or_repo_id == "abc"
assert tokenizer.bos_token_id == 0
assert tokenizer.eos_token_id == 1
assert tokenizer.pad_token_id == 2

tokenizer = get_tokenizer("test_tokenizer", tokenizer_mode="custom")
tokenizer = get_tokenizer("abc", tokenizer_mode="test_tokenizer")
assert isinstance(tokenizer, TestTokenizer)
assert tokenizer.path_or_repo_id == "abc"
assert tokenizer.bos_token_id == 0
assert tokenizer.eos_token_id == 1
assert tokenizer.pad_token_id == 2
9 changes: 2 additions & 7 deletions tests/v1/entrypoints/llm/test_struct_output_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import json
from enum import Enum
from typing import TYPE_CHECKING, Any
from typing import Any

import jsonschema
import pytest
Expand All @@ -24,11 +24,6 @@
StructuredOutputsParams,
)

if TYPE_CHECKING:
from vllm.config.model import TokenizerMode
else:
TokenizerMode = str

NGRAM_SPEC_CONFIG = {
"model": "[ngram]",
"num_speculative_tokens": 5,
Expand Down Expand Up @@ -627,7 +622,7 @@ def test_structured_output(
)
def test_structured_output_with_reasoning_matrices(
backend: str,
tokenizer_mode: TokenizerMode,
tokenizer_mode: str,
reasoning_parser: str,
model_name: str,
speculative_config: dict[str, Any] | None,
Expand Down
22 changes: 7 additions & 15 deletions vllm/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"transcription",
"draft",
]
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "custom"]
TokenizerMode = Literal["auto", "hf", "slow", "mistral"]
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
LogprobsMode = Literal[
"raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"
Expand Down Expand Up @@ -137,13 +137,13 @@ class ModelConfig:
tokenizer: SkipValidation[str] = None # type: ignore
"""Name or path of the Hugging Face tokenizer to use. If unspecified, model
name or path will be used."""
tokenizer_mode: TokenizerMode = "auto"
tokenizer_mode: TokenizerMode | str = "auto"
"""Tokenizer mode:\n
- "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.\n
- "hf" will use the fast tokenizer if available.\n
- "slow" will always use the slow tokenizer.\n
- "mistral" will always use the tokenizer from `mistral_common`.\n
- "custom" will use --tokenizer to select the preregistered tokenizer."""
- Other custom values can be supported via plugins."""
trust_remote_code: bool = False
"""Trust remote code (e.g., from HuggingFace) when downloading the model
and tokenizer."""
Expand Down Expand Up @@ -708,16 +708,17 @@ def _task_to_convert(task: TaskOption) -> ConvertType:
# can be correctly capped to sliding window size
self.hf_text_config.sliding_window = None

if not self.skip_tokenizer_init:
self._verify_tokenizer_mode()

# Avoid running try_verify_and_update_config multiple times
self.config_updated = False

self._verify_quantization()
self._verify_cuda_graph()
self._verify_bnb_config()

@field_validator("tokenizer_mode", mode="after")
def _lowercase_tokenizer_mode(cls, tokenizer_mode: str) -> str:
return tokenizer_mode.lower()

@field_validator("quantization", mode="before")
@classmethod
def validate_quantization_before(cls, value: Any) -> Any:
Expand Down Expand Up @@ -829,15 +830,6 @@ def _get_encoder_config(self):
model, _ = split_remote_gguf(model)
return get_sentence_transformer_tokenizer_config(model, self.revision)

def _verify_tokenizer_mode(self) -> None:
tokenizer_mode = cast(TokenizerMode, self.tokenizer_mode.lower())
if tokenizer_mode not in get_args(TokenizerMode):
raise ValueError(
f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be "
f"one of {get_args(TokenizerMode)}."
)
self.tokenizer_mode = tokenizer_mode

def _get_default_runner_type(
self,
architectures: list[str],
Expand Down
2 changes: 1 addition & 1 deletion vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ class EngineArgs:
task: TaskOption | None = ModelConfig.task
skip_tokenizer_init: bool = ModelConfig.skip_tokenizer_init
enable_prompt_embeds: bool = ModelConfig.enable_prompt_embeds
tokenizer_mode: TokenizerMode = ModelConfig.tokenizer_mode
tokenizer_mode: TokenizerMode | str = ModelConfig.tokenizer_mode
trust_remote_code: bool = ModelConfig.trust_remote_code
allowed_local_media_path: str = ModelConfig.allowed_local_media_path
allowed_media_domains: list[str] | None = ModelConfig.allowed_media_domains
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def __init__(
runner: RunnerOption = "auto",
convert: ConvertOption = "auto",
tokenizer: str | None = None,
tokenizer_mode: TokenizerMode = "auto",
tokenizer_mode: TokenizerMode | str = "auto",
skip_tokenizer_init: bool = False,
trust_remote_code: bool = False,
allowed_local_media_path: str = "",
Expand Down
10 changes: 8 additions & 2 deletions vllm/tokenizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from .hf import HfTokenizer
from .mistral import MistralTokenizer
from .protocol import TokenizerLike
from .registry import TokenizerRegistry
from .registry import TokenizerRegistry, get_tokenizer

__all__ = ["TokenizerLike", "HfTokenizer", "MistralTokenizer", "TokenizerRegistry"]
__all__ = [
"TokenizerLike",
"HfTokenizer",
"MistralTokenizer",
"TokenizerRegistry",
"get_tokenizer",
]
2 changes: 2 additions & 0 deletions vllm/tokenizers/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config

from .protocol import TokenizerLike
from .registry import TokenizerRegistry

if TYPE_CHECKING:
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
Expand Down Expand Up @@ -67,6 +68,7 @@ def __reduce__(self):
return cached_tokenizer # type: ignore


@TokenizerRegistry.register("hf")
class HfTokenizer(TokenizerLike):
@classmethod
def from_pretrained(
Expand Down
2 changes: 2 additions & 0 deletions vllm/tokenizers/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from vllm.logger import init_logger

from .protocol import TokenizerLike
from .registry import TokenizerRegistry

if TYPE_CHECKING:
from mistral_common.protocol.instruct.request import (
Expand Down Expand Up @@ -165,6 +166,7 @@ def _tekken_token_to_id(tokenizer: "Tekkenizer", t: str | bytes) -> int:
return tokenizer.unk_id


@TokenizerRegistry.register("mistral")
class MistralTokenizer(TokenizerLike):
@classmethod
def from_pretrained(
Expand Down
Loading