diff --git a/tests/entrypoints/openai/test_tokenization.py b/tests/entrypoints/openai/test_tokenization.py index 7fd32e1c7be1..751f94319eb9 100644 --- a/tests/entrypoints/openai/test_tokenization.py +++ b/tests/entrypoints/openai/test_tokenization.py @@ -53,7 +53,7 @@ async def test_tokenize_completions( model_name: str, tokenizer_name: str, ): - tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast") + tokenizer = get_tokenizer(tokenizer_name=tokenizer_name) for add_special in [False, True]: prompt = "vllm1 This is a test prompt." @@ -87,7 +87,7 @@ async def test_tokenize_chat( model_name: str, tokenizer_name: str, ): - tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast") + tokenizer = get_tokenizer(tokenizer_name=tokenizer_name) for add_generation in [False, True]: for add_special in [False, True]: @@ -140,7 +140,7 @@ async def test_tokenize_chat_with_tools( model_name: str, tokenizer_name: str, ): - tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast") + tokenizer = get_tokenizer(tokenizer_name=tokenizer_name) for add_generation in [False, True]: for add_special in [False, True]: @@ -210,7 +210,7 @@ async def test_tokenize_with_return_token_strs( model_name: str, tokenizer_name: str, ): - tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast") + tokenizer = get_tokenizer(tokenizer_name=tokenizer_name) prompt = "This is a token_strs test prompt! vllm1" response = requests.post( @@ -240,7 +240,7 @@ async def test_detokenize( model_name: str, tokenizer_name: str, ): - tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, tokenizer_mode="fast") + tokenizer = get_tokenizer(tokenizer_name=tokenizer_name) prompt = "This is a test prompt. vllm1" tokens = tokenizer.encode(prompt, add_special_tokens=False) diff --git a/tests/entrypoints/pooling/embed/test_online.py b/tests/entrypoints/pooling/embed/test_online.py index 0c88d800e2f9..50d8ddffde0d 100644 --- a/tests/entrypoints/pooling/embed/test_online.py +++ b/tests/entrypoints/pooling/embed/test_online.py @@ -199,7 +199,7 @@ async def test_conversation_embedding( chat_response.raise_for_status() chat_embeddings = EmbeddingResponse.model_validate(chat_response.json()) - tokenizer = get_tokenizer(tokenizer_name=model_name, tokenizer_mode="fast") + tokenizer = get_tokenizer(tokenizer_name=model_name) prompt = tokenizer.apply_chat_template( messages, chat_template=DUMMY_CHAT_TEMPLATE, diff --git a/tests/entrypoints/pooling/pooling/test_online.py b/tests/entrypoints/pooling/pooling/test_online.py index 4b20c5b0fa84..1577e786c3e9 100644 --- a/tests/entrypoints/pooling/pooling/test_online.py +++ b/tests/entrypoints/pooling/pooling/test_online.py @@ -158,11 +158,7 @@ async def test_conversation_pooling(server: RemoteOpenAIServer, model_name: str) chat_response.raise_for_status() chat_poolings = PoolingResponse.model_validate(chat_response.json()) - tokenizer = get_tokenizer( - tokenizer_name=model_name, - tokenizer_mode="fast", - trust_remote_code=True, - ) + tokenizer = get_tokenizer(tokenizer_name=model_name, trust_remote_code=True) prompt = tokenizer.apply_chat_template( messages, chat_template=DUMMY_CHAT_TEMPLATE, diff --git a/tests/models/registry.py b/tests/models/registry.py index 1f4a106c06b4..d90f3a4d4f78 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -23,7 +23,7 @@ class _HfExamplesInfo: tokenizer: str | None = None """Set the tokenizer to load for this architecture.""" - tokenizer_mode: TokenizerMode = "auto" + tokenizer_mode: TokenizerMode | str = "auto" """Set the tokenizer type for this architecture.""" speculative_model: str | None = None diff --git a/tests/tokenizers_/test_registry.py b/tests/tokenizers_/test_registry.py index b357669f8378..57b6a14a54b3 100644 --- a/tests/tokenizers_/test_registry.py +++ b/tests/tokenizers_/test_registry.py @@ -1,13 +1,28 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from pathlib import Path + from vllm.tokenizers import TokenizerLike, TokenizerRegistry from vllm.transformers_utils.tokenizer import get_tokenizer class TestTokenizer(TokenizerLike): @classmethod - def from_pretrained(cls, *args, **kwargs) -> "TestTokenizer": - return TestTokenizer() # type: ignore + def from_pretrained( + cls, + path_or_repo_id: str | Path, + *args, + trust_remote_code: bool = False, + revision: str | None = None, + download_dir: str | None = None, + **kwargs, + ) -> "TestTokenizer": + return TestTokenizer(path_or_repo_id) # type: ignore + + def __init__(self, path_or_repo_id: str | Path) -> None: + super().__init__() + + self.path_or_repo_id = path_or_repo_id @property def bos_token_id(self) -> int: @@ -29,14 +44,16 @@ def is_fast(self) -> bool: def test_customized_tokenizer(): TokenizerRegistry.register("test_tokenizer", __name__, TestTokenizer.__name__) - tokenizer = TokenizerRegistry.get_tokenizer("test_tokenizer") + tokenizer = TokenizerRegistry.get_tokenizer("test_tokenizer", "abc") assert isinstance(tokenizer, TestTokenizer) + assert tokenizer.path_or_repo_id == "abc" assert tokenizer.bos_token_id == 0 assert tokenizer.eos_token_id == 1 assert tokenizer.pad_token_id == 2 - tokenizer = get_tokenizer("test_tokenizer", tokenizer_mode="custom") + tokenizer = get_tokenizer("abc", tokenizer_mode="test_tokenizer") assert isinstance(tokenizer, TestTokenizer) + assert tokenizer.path_or_repo_id == "abc" assert tokenizer.bos_token_id == 0 assert tokenizer.eos_token_id == 1 assert tokenizer.pad_token_id == 2 diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 85f108786c05..9cc5a6e073a6 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -4,7 +4,7 @@ import json from enum import Enum -from typing import TYPE_CHECKING, Any +from typing import Any import jsonschema import pytest @@ -24,11 +24,6 @@ StructuredOutputsParams, ) -if TYPE_CHECKING: - from vllm.config.model import TokenizerMode -else: - TokenizerMode = str - NGRAM_SPEC_CONFIG = { "model": "[ngram]", "num_speculative_tokens": 5, @@ -627,7 +622,7 @@ def test_structured_output( ) def test_structured_output_with_reasoning_matrices( backend: str, - tokenizer_mode: TokenizerMode, + tokenizer_mode: str, reasoning_parser: str, model_name: str, speculative_config: dict[str, Any] | None, diff --git a/vllm/config/model.py b/vllm/config/model.py index 92cd48402a65..6a052c16e6d6 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -86,7 +86,7 @@ "transcription", "draft", ] -TokenizerMode = Literal["auto", "hf", "slow", "mistral", "custom"] +TokenizerMode = Literal["auto", "hf", "slow", "mistral"] ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"] LogprobsMode = Literal[ "raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs" @@ -137,13 +137,13 @@ class ModelConfig: tokenizer: SkipValidation[str] = None # type: ignore """Name or path of the Hugging Face tokenizer to use. If unspecified, model name or path will be used.""" - tokenizer_mode: TokenizerMode = "auto" + tokenizer_mode: TokenizerMode | str = "auto" """Tokenizer mode:\n - "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.\n - "hf" will use the fast tokenizer if available.\n - "slow" will always use the slow tokenizer.\n - "mistral" will always use the tokenizer from `mistral_common`.\n - - "custom" will use --tokenizer to select the preregistered tokenizer.""" + - Other custom values can be supported via plugins.""" trust_remote_code: bool = False """Trust remote code (e.g., from HuggingFace) when downloading the model and tokenizer.""" @@ -708,9 +708,6 @@ def _task_to_convert(task: TaskOption) -> ConvertType: # can be correctly capped to sliding window size self.hf_text_config.sliding_window = None - if not self.skip_tokenizer_init: - self._verify_tokenizer_mode() - # Avoid running try_verify_and_update_config multiple times self.config_updated = False @@ -718,6 +715,10 @@ def _task_to_convert(task: TaskOption) -> ConvertType: self._verify_cuda_graph() self._verify_bnb_config() + @field_validator("tokenizer_mode", mode="after") + def _lowercase_tokenizer_mode(cls, tokenizer_mode: str) -> str: + return tokenizer_mode.lower() + @field_validator("quantization", mode="before") @classmethod def validate_quantization_before(cls, value: Any) -> Any: @@ -829,15 +830,6 @@ def _get_encoder_config(self): model, _ = split_remote_gguf(model) return get_sentence_transformer_tokenizer_config(model, self.revision) - def _verify_tokenizer_mode(self) -> None: - tokenizer_mode = cast(TokenizerMode, self.tokenizer_mode.lower()) - if tokenizer_mode not in get_args(TokenizerMode): - raise ValueError( - f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be " - f"one of {get_args(TokenizerMode)}." - ) - self.tokenizer_mode = tokenizer_mode - def _get_default_runner_type( self, architectures: list[str], diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 186a2a414187..96b1b971552c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -360,7 +360,7 @@ class EngineArgs: task: TaskOption | None = ModelConfig.task skip_tokenizer_init: bool = ModelConfig.skip_tokenizer_init enable_prompt_embeds: bool = ModelConfig.enable_prompt_embeds - tokenizer_mode: TokenizerMode = ModelConfig.tokenizer_mode + tokenizer_mode: TokenizerMode | str = ModelConfig.tokenizer_mode trust_remote_code: bool = ModelConfig.trust_remote_code allowed_local_media_path: str = ModelConfig.allowed_local_media_path allowed_media_domains: list[str] | None = ModelConfig.allowed_media_domains diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index acdf28501cbb..f005605c08d7 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -188,7 +188,7 @@ def __init__( runner: RunnerOption = "auto", convert: ConvertOption = "auto", tokenizer: str | None = None, - tokenizer_mode: TokenizerMode = "auto", + tokenizer_mode: TokenizerMode | str = "auto", skip_tokenizer_init: bool = False, trust_remote_code: bool = False, allowed_local_media_path: str = "", diff --git a/vllm/tokenizers/__init__.py b/vllm/tokenizers/__init__.py index 03174872146a..14f0148cf7ba 100644 --- a/vllm/tokenizers/__init__.py +++ b/vllm/tokenizers/__init__.py @@ -4,6 +4,12 @@ from .hf import HfTokenizer from .mistral import MistralTokenizer from .protocol import TokenizerLike -from .registry import TokenizerRegistry +from .registry import TokenizerRegistry, get_tokenizer -__all__ = ["TokenizerLike", "HfTokenizer", "MistralTokenizer", "TokenizerRegistry"] +__all__ = [ + "TokenizerLike", + "HfTokenizer", + "MistralTokenizer", + "TokenizerRegistry", + "get_tokenizer", +] diff --git a/vllm/tokenizers/hf.py b/vllm/tokenizers/hf.py index 64672fdbb120..344507312038 100644 --- a/vllm/tokenizers/hf.py +++ b/vllm/tokenizers/hf.py @@ -10,6 +10,7 @@ from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config from .protocol import TokenizerLike +from .registry import TokenizerRegistry if TYPE_CHECKING: from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast @@ -67,6 +68,7 @@ def __reduce__(self): return cached_tokenizer # type: ignore +@TokenizerRegistry.register("hf") class HfTokenizer(TokenizerLike): @classmethod def from_pretrained( diff --git a/vllm/tokenizers/mistral.py b/vllm/tokenizers/mistral.py index de3e5ec43854..7e6745004b01 100644 --- a/vllm/tokenizers/mistral.py +++ b/vllm/tokenizers/mistral.py @@ -6,6 +6,7 @@ from vllm.logger import init_logger from .protocol import TokenizerLike +from .registry import TokenizerRegistry if TYPE_CHECKING: from mistral_common.protocol.instruct.request import ( @@ -165,6 +166,7 @@ def _tekken_token_to_id(tokenizer: "Tekkenizer", t: str | bytes) -> int: return tokenizer.unk_id +@TokenizerRegistry.register("mistral") class MistralTokenizer(TokenizerLike): @classmethod def from_pretrained( diff --git a/vllm/tokenizers/registry.py b/vllm/tokenizers/registry.py index 3a236c99b356..d5e789932161 100644 --- a/vllm/tokenizers/registry.py +++ b/vllm/tokenizers/registry.py @@ -1,28 +1,197 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import importlib +import importlib.util +from collections.abc import Callable +from pathlib import Path +from typing import TypeVar, overload + +import huggingface_hub + +import vllm.envs as envs +from vllm.logger import init_logger +from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf +from vllm.transformers_utils.repo_utils import list_filtered_repo_files +from vllm.transformers_utils.utils import ( + check_gguf_file, + is_gguf, + is_remote_gguf, + split_remote_gguf, +) +from vllm.utils.import_utils import resolve_obj_by_qualname from .protocol import TokenizerLike +logger = init_logger(__name__) + +_T = TypeVar("_T", bound=type[TokenizerLike]) + class TokenizerRegistry: - # Tokenizer name -> (tokenizer module, tokenizer class) - REGISTRY: dict[str, tuple[str, str]] = {} + # Tokenizer name -> tokenizer_cls or (tokenizer module, tokenizer class) + REGISTRY: dict[str, type[TokenizerLike] | tuple[str, str]] = {} + # In-tree tokenizers @staticmethod - def register(name: str, module: str, class_name: str) -> None: - TokenizerRegistry.REGISTRY[name] = (module, class_name) + @overload + def register(tokenizer_mode: str) -> Callable[[_T], _T]: ... + # OOT tokenizers @staticmethod - def get_tokenizer( - tokenizer_name: str, - *args, - **kwargs, - ) -> "TokenizerLike": - tokenizer_cls = TokenizerRegistry.REGISTRY.get(tokenizer_name) - if tokenizer_cls is None: - raise ValueError(f"Tokenizer {tokenizer_name} not found.") + @overload + def register(tokenizer_mode: str, module: str, class_name: str) -> None: ... + + @staticmethod + def register( + tokenizer_mode: str, + module: str | None = None, + class_name: str | None = None, + ) -> Callable[[_T], _T] | None: + # In-tree tokenizers + if module is None or class_name is None: + + def wrapper(tokenizer_cls: _T) -> _T: + assert tokenizer_mode not in TokenizerRegistry.REGISTRY + TokenizerRegistry.REGISTRY[tokenizer_mode] = tokenizer_cls + + return tokenizer_cls + + return wrapper + + # OOT tokenizers + if tokenizer_mode in TokenizerRegistry.REGISTRY: + logger.warning( + "%s.%s is already registered for tokenizer_mode=%r. " + "It is overwritten by the new one.", + module, + class_name, + tokenizer_mode, + ) + + TokenizerRegistry.REGISTRY[tokenizer_mode] = (module, class_name) + + return None + + @staticmethod + def get_tokenizer(tokenizer_mode: str, *args, **kwargs) -> "TokenizerLike": + if tokenizer_mode not in TokenizerRegistry.REGISTRY: + raise ValueError(f"No tokenizer registered for {tokenizer_mode=!r}.") + + item = TokenizerRegistry.REGISTRY[tokenizer_mode] + if isinstance(item, type): + return item.from_pretrained(*args, **kwargs) + + module, class_name = item + logger.debug_once(f"Loading {class_name} for {tokenizer_mode=!r}") - tokenizer_module = importlib.import_module(tokenizer_cls[0]) - class_ = getattr(tokenizer_module, tokenizer_cls[1]) + class_ = resolve_obj_by_qualname(f"{module}.{class_name}") return class_.from_pretrained(*args, **kwargs) + + +def get_tokenizer( + tokenizer_name: str | Path, + *args, + tokenizer_mode: str = "auto", + trust_remote_code: bool = False, + revision: str | None = None, + download_dir: str | None = None, + **kwargs, +) -> TokenizerLike: + """Gets a tokenizer for the given model name via HuggingFace or ModelScope.""" + if envs.VLLM_USE_MODELSCOPE: + # download model from ModelScope hub, + # lazy import so that modelscope is not required for normal use. + from modelscope.hub.snapshot_download import snapshot_download + + # avoid circular import + from vllm.model_executor.model_loader.weight_utils import get_lock + + # Only set the tokenizer here, model will be downloaded on the workers. + if not Path(tokenizer_name).exists(): + # Use file lock to prevent multiple processes from + # downloading the same file at the same time. + with get_lock(tokenizer_name, download_dir): + tokenizer_path = snapshot_download( + model_id=str(tokenizer_name), + cache_dir=download_dir, + revision=revision, + local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, + # Ignore weights - we only need the tokenizer. + ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"], + ) + tokenizer_name = tokenizer_path + + if tokenizer_mode == "slow": + if kwargs.get("use_fast", False): + raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.") + + tokenizer_mode = "hf" + kwargs["use_fast"] = False + + if "truncation_side" not in kwargs: + kwargs["truncation_side"] = "left" + + # Separate model folder from file path for GGUF models + if is_gguf(tokenizer_name): + if check_gguf_file(tokenizer_name): + kwargs["gguf_file"] = Path(tokenizer_name).name + tokenizer_name = Path(tokenizer_name).parent + elif is_remote_gguf(tokenizer_name): + tokenizer_name, quant_type = split_remote_gguf(tokenizer_name) + # Get the HuggingFace Hub path for the GGUF file + gguf_file = get_gguf_file_path_from_hf( + tokenizer_name, + quant_type, + revision=revision, + ) + kwargs["gguf_file"] = gguf_file + + # Try to use official Mistral tokenizer if possible + if tokenizer_mode == "auto" and importlib.util.find_spec("mistral_common"): + allow_patterns = ["tekken.json", "tokenizer.model.v*"] + files_list = list_filtered_repo_files( + model_name_or_path=str(tokenizer_name), + allow_patterns=allow_patterns, + revision=revision, + ) + if len(files_list) > 0: + tokenizer_mode = "mistral" + + # Fallback to HF tokenizer + if tokenizer_mode == "auto": + tokenizer_mode = "hf" + + tokenizer_args = (tokenizer_name, *args) + tokenizer_kwargs = dict( + trust_remote_code=trust_remote_code, + revision=revision, + download_dir=download_dir, + **kwargs, + ) + + if tokenizer_mode == "custom": + logger.warning_once( + "TokenizerRegistry now uses `tokenizer_mode` as the registry key " + "instead of `tokenizer_name`. " + "Please update the definition of `.from_pretrained` in " + "your custom tokenizer to accept `args=%s`, `kwargs=%s`. " + "Then, you can pass `tokenizer_mode=%r` instead of " + "`tokenizer_mode='custom'` when initializing vLLM.", + tokenizer_args, + str(tokenizer_kwargs), + tokenizer_mode, + ) + + tokenizer_mode = str(tokenizer_name) + + tokenizer = TokenizerRegistry.get_tokenizer( + tokenizer_mode, + *tokenizer_args, + **tokenizer_kwargs, + ) + if not tokenizer.is_fast: + logger.warning( + "Using a slow tokenizer. This might cause a significant " + "slowdown. Consider using a fast tokenizer instead." + ) + + return tokenizer diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 622d5c7fe993..0911848c02e1 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -1,28 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import importlib.util -import os import warnings from functools import lru_cache -from pathlib import Path from typing import TYPE_CHECKING, Any -import huggingface_hub from typing_extensions import assert_never -from vllm import envs from vllm.logger import init_logger -from vllm.tokenizers import ( - HfTokenizer, - MistralTokenizer, - TokenizerLike, - TokenizerRegistry, -) - -from .gguf_utils import get_gguf_file_path_from_hf -from .repo_utils import list_filtered_repo_files -from .utils import check_gguf_file, is_gguf, is_remote_gguf, split_remote_gguf +from vllm.tokenizers import TokenizerLike, get_tokenizer if TYPE_CHECKING: from vllm.config import ModelConfig @@ -108,117 +94,6 @@ def encode_tokens( return tokenizer.encode(text, **kw_args) -def get_tokenizer( - tokenizer_name: str | Path, - *args, - tokenizer_mode: str = "auto", - trust_remote_code: bool = False, - revision: str | None = None, - download_dir: str | None = None, - **kwargs, -) -> TokenizerLike: - """Gets a tokenizer for the given model name via HuggingFace or ModelScope.""" - if envs.VLLM_USE_MODELSCOPE: - # download model from ModelScope hub, - # lazy import so that modelscope is not required for normal use. - # pylint: disable=C. - from modelscope.hub.snapshot_download import snapshot_download - - # avoid circuit import - from vllm.model_executor.model_loader.weight_utils import get_lock - - # Only set the tokenizer here, model will be downloaded on the workers. - if not os.path.exists(tokenizer_name): - # Use file lock to prevent multiple processes from - # downloading the same file at the same time. - with get_lock(tokenizer_name, download_dir): - tokenizer_path = snapshot_download( - model_id=tokenizer_name, - cache_dir=download_dir, - revision=revision, - local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, - # Ignore weights - we only need the tokenizer. - ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"], - ) - tokenizer_name = tokenizer_path - - if tokenizer_mode == "slow": - if kwargs.get("use_fast", False): - raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.") - kwargs["use_fast"] = False - - if "truncation_side" not in kwargs: - kwargs["truncation_side"] = "left" - - # Separate model folder from file path for GGUF models - if is_gguf(tokenizer_name): - if check_gguf_file(tokenizer_name): - kwargs["gguf_file"] = Path(tokenizer_name).name - tokenizer_name = Path(tokenizer_name).parent - elif is_remote_gguf(tokenizer_name): - tokenizer_name, quant_type = split_remote_gguf(tokenizer_name) - # Get the HuggingFace Hub path for the GGUF file - gguf_file = get_gguf_file_path_from_hf( - tokenizer_name, - quant_type, - revision=revision, - ) - kwargs["gguf_file"] = gguf_file - - # if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format - # first to use official Mistral tokenizer if possible. - mistral_common_installed = importlib.util.find_spec("mistral_common") is not None - if tokenizer_mode == "auto" and mistral_common_installed: - allow_patterns = ["tekken.json", "tokenizer.model.v*"] - files_list = list_filtered_repo_files( - model_name_or_path=str(tokenizer_name), - allow_patterns=allow_patterns, - revision=revision, - ) - if len(files_list) > 0: - tokenizer_mode = "mistral" - - tokenizer: TokenizerLike - if tokenizer_mode == "mistral": - logger.debug_once(f"Loading MistralTokenizer from {tokenizer_name}") - tokenizer = MistralTokenizer.from_pretrained( - tokenizer_name, - *args, - trust_remote_code=trust_remote_code, - revision=revision, - download_dir=download_dir, - **kwargs, - ) - elif tokenizer_mode == "custom": - logger.debug_once(f"Loading CustomTokenizer from {tokenizer_name}") - tokenizer = TokenizerRegistry.get_tokenizer( - str(tokenizer_name), - *args, - trust_remote_code=trust_remote_code, - revision=revision, - download_dir=download_dir, - **kwargs, - ) - else: - logger.debug_once(f"Loading HfTokenizer from {tokenizer_name}") - tokenizer = HfTokenizer.from_pretrained( - tokenizer_name, - *args, - trust_remote_code=trust_remote_code, - revision=revision, - download_dir=download_dir, - **kwargs, - ) - - if not tokenizer.is_fast: - logger.warning( - "Using a slow tokenizer. This might cause a significant " - "slowdown. Consider using a fast tokenizer instead." - ) - - return tokenizer - - cached_get_tokenizer = lru_cache(get_tokenizer)