Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions tests/models/multimodal/processing/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.tokenizers import (
MistralTokenizer,
TokenizerLike,
cached_tokenizer_from_config,
)

from ....multimodal.utils import random_audio, random_image, random_video
from ...registry import (
Expand Down Expand Up @@ -151,7 +154,7 @@ def get_text_token_prompts(
mm_data: MultiModalDataDict,
):
dummy_inputs = processor.dummy_inputs
tokenizer = processor.info.get_tokenizer()
tokenizer: TokenizerLike = processor.info.get_tokenizer()
model_config = processor.info.ctx.model_config

model_type = model_config.hf_config.model_type
Expand Down Expand Up @@ -188,10 +191,9 @@ def get_text_token_prompts(
assert isinstance(inputs.prompt, str)

text_prompt = inputs.prompt
token_prompt = encode_tokens(
tokenizer,
token_prompt = tokenizer.encode(
text_prompt,
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type),
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
)

return text_prompt, token_prompt
Expand Down
3 changes: 1 addition & 2 deletions tests/models/multimodal/processing/test_llama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pytest

from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.tokenizer import encode_tokens

from ....conftest import ImageTestAssets
from ...utils import build_model_context
Expand Down Expand Up @@ -48,7 +47,7 @@ def test_processor_override(
]
}
if tokenized_prompt:
prompt = encode_tokens(tokenizer, prompt)
prompt = tokenizer.encode(prompt)

processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
mm_data = processed_inputs["mm_kwargs"].get_data()
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from vllm.logger import init_logger
from vllm.model_executor.models import SupportsTranscription
from vllm.outputs import RequestOutput
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
from vllm.utils.import_utils import PlaceholderModule

try:
Expand Down
4 changes: 2 additions & 2 deletions vllm/entrypoints/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class RenderConfig:
`0` yields an empty list (and skips embeds).
`-1` maps to `model_config.max_model_len`."""

add_special_tokens: bool | None = True
add_special_tokens: bool = True
"""Whether to add model-specific special tokens during tokenization."""

cache_salt: str | None = None
Expand Down Expand Up @@ -315,7 +315,7 @@ async def _create_prompt_from_text(
text: str,
max_length: int | None,
truncate_prompt_tokens: int | None,
add_special_tokens: bool | None,
add_special_tokens: bool,
cache_salt: str | None,
) -> EngineTokensPrompt:
"""Tokenize text input asynchronously."""
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/score_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from vllm.model_executor.models.interfaces import supports_score_template
from vllm.multimodal.inputs import MultiModalDataDict
from vllm.outputs import PoolingRequestOutput
from vllm.transformers_utils.tokenizer import TokenizerLike
from vllm.tokenizers import TokenizerLike

ScoreContentPartParam: TypeAlias = (
ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam
Expand Down
25 changes: 10 additions & 15 deletions vllm/model_executor/models/nano_nemotron_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
from vllm.transformers_utils.configs.radio import RadioConfig
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape

from .utils import _merge_multimodal_embeddings
Expand Down Expand Up @@ -454,14 +453,12 @@ def __init__(

# Pre-tokenize special tokens for video processing
# to avoid repeated tokenization
self._img_start_token_ids = encode_tokens(
tokenizer, IMG_START, add_special_tokens=False
self._img_start_token_ids = tokenizer.encode(
IMG_START, add_special_tokens=False
)
self._img_end_token_ids = encode_tokens(
tokenizer, IMG_END, add_special_tokens=False
)
self._img_context_token_ids = encode_tokens(
tokenizer, IMG_CONTEXT, add_special_tokens=False
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
self._img_context_token_ids = tokenizer.encode(
IMG_CONTEXT, add_special_tokens=False
)

@property
Expand Down Expand Up @@ -1179,14 +1176,12 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
# Pre-tokenize special tokens for video processing
# to avoid repeated tokenization
tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
self._img_start_token_ids = encode_tokens(
tokenizer, IMG_START, add_special_tokens=False
)
self._img_end_token_ids = encode_tokens(
tokenizer, IMG_END, add_special_tokens=False
self._img_start_token_ids = tokenizer.encode(
IMG_START, add_special_tokens=False
)
self._img_context_token_ids = encode_tokens(
tokenizer, IMG_CONTEXT, add_special_tokens=False
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
self._img_context_token_ids = tokenizer.encode(
IMG_CONTEXT, add_special_tokens=False
)

def pixel_shuffle(self, x, scale_factor=0.5):
Expand Down
3 changes: 1 addition & 2 deletions vllm/model_executor/models/qwen2_5_omni_thinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape

from .interfaces import (
Expand Down Expand Up @@ -591,7 +590,7 @@ def _apply_hf_processor_main(
tokenization_kwargs=tokenization_kwargs,
)
tokenizer = self.info.get_tokenizer()
prompt_ids = encode_tokens(tokenizer, prompt)
prompt_ids = tokenizer.encode(prompt)
else:
prompt_ids = self._apply_hf_processor_tokens_only(prompt)

Expand Down
19 changes: 8 additions & 11 deletions vllm/multimodal/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens
from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
from vllm.utils.jsontree import JSONTree, json_map_leaves
Expand Down Expand Up @@ -80,21 +79,19 @@ def _cached_encode(
tokenizer: TokenizerLike,
text: str,
*,
add_special_tokens: bool | None = None,
add_special_tokens: bool = True,
) -> list[int]:
return encode_tokens(tokenizer, text, add_special_tokens=add_special_tokens)
return tokenizer.encode(text, add_special_tokens=add_special_tokens)


@lru_cache(maxsize=2048)
def _cached_decode(
tokenizer: TokenizerLike,
token_ids: tuple[int, ...],
*,
skip_special_tokens: bool | None = None,
skip_special_tokens: bool = False,
) -> str:
return decode_tokens(
tokenizer, list(token_ids), skip_special_tokens=skip_special_tokens
)
return tokenizer.decode(list(token_ids), skip_special_tokens=skip_special_tokens)


def _seq2text(
Expand All @@ -110,7 +107,7 @@ def _seq2text(
raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`")

if not use_cache:
return decode_tokens(tokenizer, seq)
return tokenizer.decode(seq)

return _cached_decode(tokenizer, tuple(seq))

Expand All @@ -126,7 +123,7 @@ def _seq2tokens(
raise ValueError("You cannot encode text when `skip_tokenizer_init=True`")

if not use_cache:
return encode_tokens(tokenizer, seq, add_special_tokens=False)
return tokenizer.encode(seq, add_special_tokens=False)

return _cached_encode(tokenizer, seq, add_special_tokens=False)

Expand Down Expand Up @@ -2198,8 +2195,8 @@ def _get_enc_dec_inputs(
tokenizer = self.info.get_tokenizer()
decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data)
if isinstance(decoder_prompt_raw, str):
decoder_prompt_ids = encode_tokens(
tokenizer, decoder_prompt_raw, add_special_tokens=False
decoder_prompt_ids = tokenizer.encode(
decoder_prompt_raw, add_special_tokens=False
)
else:
decoder_prompt_ids = decoder_prompt_raw
Expand Down
4 changes: 4 additions & 0 deletions vllm/transformers_utils/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import warnings
from typing import Any

from typing_extensions import deprecated

from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike

Expand Down Expand Up @@ -73,6 +75,7 @@ def __getattr__(name: str):
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


@deprecated("Will be removed in v0.13. Please use `tokenizer.decode()` instead.")
def decode_tokens(
tokenizer: TokenizerLike,
token_ids: list[int],
Expand All @@ -94,6 +97,7 @@ def decode_tokens(
return tokenizer.decode(token_ids, **kw_args)


@deprecated("Will be removed in v0.13. Please use `tokenizer.encode()` instead.")
def encode_tokens(
tokenizer: TokenizerLike,
text: str,
Expand Down