diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index 90158a028b0b..8ef1fba8df3e 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -22,8 +22,11 @@ from vllm.multimodal.cache import MultiModalProcessorOnlyCache from vllm.multimodal.inputs import MultiModalInputs from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext -from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config -from vllm.transformers_utils.tokenizer import encode_tokens +from vllm.tokenizers import ( + MistralTokenizer, + TokenizerLike, + cached_tokenizer_from_config, +) from ....multimodal.utils import random_audio, random_image, random_video from ...registry import ( @@ -151,7 +154,7 @@ def get_text_token_prompts( mm_data: MultiModalDataDict, ): dummy_inputs = processor.dummy_inputs - tokenizer = processor.info.get_tokenizer() + tokenizer: TokenizerLike = processor.info.get_tokenizer() model_config = processor.info.ctx.model_config model_type = model_config.hf_config.model_type @@ -188,10 +191,9 @@ def get_text_token_prompts( assert isinstance(inputs.prompt, str) text_prompt = inputs.prompt - token_prompt = encode_tokens( - tokenizer, + token_prompt = tokenizer.encode( text_prompt, - add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type), + add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True), ) return text_prompt, token_prompt diff --git a/tests/models/multimodal/processing/test_llama4.py b/tests/models/multimodal/processing/test_llama4.py index 4c0791ea3cec..b73246b68b36 100644 --- a/tests/models/multimodal/processing/test_llama4.py +++ b/tests/models/multimodal/processing/test_llama4.py @@ -5,7 +5,6 @@ import pytest from vllm.multimodal import MULTIMODAL_REGISTRY -from vllm.transformers_utils.tokenizer import encode_tokens from ....conftest import ImageTestAssets from ...utils import build_model_context @@ -48,7 +47,7 @@ def test_processor_override( ] } if tokenized_prompt: - prompt = encode_tokens(tokenizer, prompt) + prompt = tokenizer.encode(prompt) processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs) mm_data = processed_inputs["mm_kwargs"].get_data() diff --git a/vllm/entrypoints/openai/speech_to_text.py b/vllm/entrypoints/openai/speech_to_text.py index b34446d3230b..cea9924ebbac 100644 --- a/vllm/entrypoints/openai/speech_to_text.py +++ b/vllm/entrypoints/openai/speech_to_text.py @@ -37,7 +37,7 @@ from vllm.logger import init_logger from vllm.model_executor.models import SupportsTranscription from vllm.outputs import RequestOutput -from vllm.transformers_utils.tokenizer import get_tokenizer +from vllm.tokenizers import get_tokenizer from vllm.utils.import_utils import PlaceholderModule try: diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py index 10b90bbbb0f3..f31b309b8ca4 100644 --- a/vllm/entrypoints/renderer.py +++ b/vllm/entrypoints/renderer.py @@ -33,7 +33,7 @@ class RenderConfig: `0` yields an empty list (and skips embeds). `-1` maps to `model_config.max_model_len`.""" - add_special_tokens: bool | None = True + add_special_tokens: bool = True """Whether to add model-specific special tokens during tokenization.""" cache_salt: str | None = None @@ -315,7 +315,7 @@ async def _create_prompt_from_text( text: str, max_length: int | None, truncate_prompt_tokens: int | None, - add_special_tokens: bool | None, + add_special_tokens: bool, cache_salt: str | None, ) -> EngineTokensPrompt: """Tokenize text input asynchronously.""" diff --git a/vllm/entrypoints/score_utils.py b/vllm/entrypoints/score_utils.py index 602f59ac09f5..8819c85af9a2 100644 --- a/vllm/entrypoints/score_utils.py +++ b/vllm/entrypoints/score_utils.py @@ -19,7 +19,7 @@ from vllm.model_executor.models.interfaces import supports_score_template from vllm.multimodal.inputs import MultiModalDataDict from vllm.outputs import PoolingRequestOutput -from vllm.transformers_utils.tokenizer import TokenizerLike +from vllm.tokenizers import TokenizerLike ScoreContentPartParam: TypeAlias = ( ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index 0f86a1775280..891a9ce08023 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -75,7 +75,6 @@ from vllm.sequence import IntermediateTensors from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config from vllm.transformers_utils.configs.radio import RadioConfig -from vllm.transformers_utils.tokenizer import encode_tokens from vllm.utils.tensor_schema import TensorSchema, TensorShape from .utils import _merge_multimodal_embeddings @@ -454,14 +453,12 @@ def __init__( # Pre-tokenize special tokens for video processing # to avoid repeated tokenization - self._img_start_token_ids = encode_tokens( - tokenizer, IMG_START, add_special_tokens=False + self._img_start_token_ids = tokenizer.encode( + IMG_START, add_special_tokens=False ) - self._img_end_token_ids = encode_tokens( - tokenizer, IMG_END, add_special_tokens=False - ) - self._img_context_token_ids = encode_tokens( - tokenizer, IMG_CONTEXT, add_special_tokens=False + self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False) + self._img_context_token_ids = tokenizer.encode( + IMG_CONTEXT, add_special_tokens=False ) @property @@ -1179,14 +1176,12 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # Pre-tokenize special tokens for video processing # to avoid repeated tokenization tokenizer = cached_tokenizer_from_config(vllm_config.model_config) - self._img_start_token_ids = encode_tokens( - tokenizer, IMG_START, add_special_tokens=False - ) - self._img_end_token_ids = encode_tokens( - tokenizer, IMG_END, add_special_tokens=False + self._img_start_token_ids = tokenizer.encode( + IMG_START, add_special_tokens=False ) - self._img_context_token_ids = encode_tokens( - tokenizer, IMG_CONTEXT, add_special_tokens=False + self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False) + self._img_context_token_ids = tokenizer.encode( + IMG_CONTEXT, add_special_tokens=False ) def pixel_shuffle(self, x, scale_factor=0.5): diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index 7506ee8656fd..1ce0fb4e4d93 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -88,7 +88,6 @@ ) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors -from vllm.transformers_utils.tokenizer import encode_tokens from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import ( @@ -591,7 +590,7 @@ def _apply_hf_processor_main( tokenization_kwargs=tokenization_kwargs, ) tokenizer = self.info.get_tokenizer() - prompt_ids = encode_tokens(tokenizer, prompt) + prompt_ids = tokenizer.encode(prompt) else: prompt_ids = self._apply_hf_processor_tokens_only(prompt) diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 2f651bd71706..f241e79cfa7c 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -25,7 +25,6 @@ from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.transformers_utils.processor import cached_processor_from_config -from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens from vllm.utils.collection_utils import flatten_2d_lists, full_groupby from vllm.utils.func_utils import get_allowed_kwarg_only_overrides from vllm.utils.jsontree import JSONTree, json_map_leaves @@ -80,9 +79,9 @@ def _cached_encode( tokenizer: TokenizerLike, text: str, *, - add_special_tokens: bool | None = None, + add_special_tokens: bool = True, ) -> list[int]: - return encode_tokens(tokenizer, text, add_special_tokens=add_special_tokens) + return tokenizer.encode(text, add_special_tokens=add_special_tokens) @lru_cache(maxsize=2048) @@ -90,11 +89,9 @@ def _cached_decode( tokenizer: TokenizerLike, token_ids: tuple[int, ...], *, - skip_special_tokens: bool | None = None, + skip_special_tokens: bool = False, ) -> str: - return decode_tokens( - tokenizer, list(token_ids), skip_special_tokens=skip_special_tokens - ) + return tokenizer.decode(list(token_ids), skip_special_tokens=skip_special_tokens) def _seq2text( @@ -110,7 +107,7 @@ def _seq2text( raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`") if not use_cache: - return decode_tokens(tokenizer, seq) + return tokenizer.decode(seq) return _cached_decode(tokenizer, tuple(seq)) @@ -126,7 +123,7 @@ def _seq2tokens( raise ValueError("You cannot encode text when `skip_tokenizer_init=True`") if not use_cache: - return encode_tokens(tokenizer, seq, add_special_tokens=False) + return tokenizer.encode(seq, add_special_tokens=False) return _cached_encode(tokenizer, seq, add_special_tokens=False) @@ -2198,8 +2195,8 @@ def _get_enc_dec_inputs( tokenizer = self.info.get_tokenizer() decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data) if isinstance(decoder_prompt_raw, str): - decoder_prompt_ids = encode_tokens( - tokenizer, decoder_prompt_raw, add_special_tokens=False + decoder_prompt_ids = tokenizer.encode( + decoder_prompt_raw, add_special_tokens=False ) else: decoder_prompt_ids = decoder_prompt_raw diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 617d16779ca2..32999903b348 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -4,6 +4,8 @@ import warnings from typing import Any +from typing_extensions import deprecated + from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike @@ -73,6 +75,7 @@ def __getattr__(name: str): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") +@deprecated("Will be removed in v0.13. Please use `tokenizer.decode()` instead.") def decode_tokens( tokenizer: TokenizerLike, token_ids: list[int], @@ -94,6 +97,7 @@ def decode_tokens( return tokenizer.decode(token_ids, **kw_args) +@deprecated("Will be removed in v0.13. Please use `tokenizer.encode()` instead.") def encode_tokens( tokenizer: TokenizerLike, text: str,