Skip to content

Commit 6fc5841

Browse files
authored
Fix some more Transformers nightly tests (#29872)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 3ff5b53 commit 6fc5841

File tree

8 files changed

+75
-27
lines changed

8 files changed

+75
-27
lines changed

examples/offline_inference/vision_language.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1801,7 +1801,10 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
18011801
engine_args = EngineArgs(
18021802
model=model_name,
18031803
max_model_len=4096,
1804-
hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
1804+
hf_overrides={
1805+
"architectures": ["Tarsier2ForConditionalGeneration"],
1806+
"model_type": "tarsier2",
1807+
},
18051808
limit_mm_per_prompt={modality: 1},
18061809
)
18071810

examples/offline_inference/vision_language_multi_image.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,10 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData:
12221222
trust_remote_code=True,
12231223
max_model_len=32768,
12241224
limit_mm_per_prompt={"image": len(image_urls)},
1225-
hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
1225+
hf_overrides={
1226+
"architectures": ["Tarsier2ForConditionalGeneration"],
1227+
"model_type": "tarsier2",
1228+
},
12261229
)
12271230

12281231
prompt = (

tests/models/registry.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,10 @@ def check_available_online(
831831
"TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"),
832832
"Tarsier2ForConditionalGeneration": _HfExamplesInfo(
833833
"omni-research/Tarsier2-Recap-7b",
834-
hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
834+
hf_overrides={
835+
"architectures": ["Tarsier2ForConditionalGeneration"],
836+
"model_type": "tarsier2",
837+
},
835838
),
836839
"VoxtralForConditionalGeneration": _HfExamplesInfo(
837840
"mistralai/Voxtral-Mini-3B-2507",

vllm/model_executor/models/qwen2_vl.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,15 +1576,6 @@ class Tarsier2ForConditionalGeneration(Qwen2VLForConditionalGeneration):
15761576
}
15771577
)
15781578

1579-
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
1580-
# Tarsier2 uses llava as model_type, which will create a Qwen2VLConfig
1581-
# as text_config, we need to reconstruct Qwen2VLConfig from LlavaConfig.
1582-
config = vllm_config.model_config.hf_config
1583-
qwen2vl_config = config.text_config
1584-
qwen2vl_config.architectures = config.architectures
1585-
vllm_config.model_config.hf_config = qwen2vl_config
1586-
super().__init__(vllm_config=vllm_config, prefix=prefix)
1587-
15881579
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
15891580
skip_prefixes = []
15901581
if self.visual is None:

vllm/tokenizers/mistral.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,19 @@
1414
)
1515
from mistral_common.tokens.tokenizers.tekken import Tekkenizer
1616
from transformers import BatchEncoding
17-
from transformers.tokenization_mistral_common import (
18-
MistralCommonTokenizer as TransformersMistralTokenizer,
19-
)
2017

2118
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
2219
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
2320

21+
try:
22+
# Transformers v5
23+
from transformers.tokenization_mistral_common import MistralCommonBackend
24+
except ImportError:
25+
# Transformers v4
26+
from transformers.tokenization_mistral_common import (
27+
MistralCommonTokenizer as MistralCommonBackend,
28+
)
29+
2430
logger = init_logger(__name__)
2531

2632

@@ -208,11 +214,17 @@ def from_pretrained(
208214
**kwargs,
209215
) -> "MistralTokenizer":
210216
from mistral_common.protocol.instruct.validator import ValidationMode
211-
from transformers.tokenization_mistral_common import (
212-
MistralCommonTokenizer as TransformersMistralTokenizer,
213-
)
214217

215-
tokenizer = TransformersMistralTokenizer.from_pretrained(
218+
try:
219+
# Transformers v5
220+
from transformers.tokenization_mistral_common import MistralCommonBackend
221+
except ImportError:
222+
# Transformers v4
223+
from transformers.tokenization_mistral_common import (
224+
MistralCommonTokenizer as MistralCommonBackend,
225+
)
226+
227+
tokenizer = MistralCommonBackend.from_pretrained(
216228
path_or_repo_id,
217229
*args,
218230
mode=ValidationMode.test,
@@ -223,7 +235,7 @@ def from_pretrained(
223235

224236
return cls(tokenizer)
225237

226-
def __init__(self, tokenizer: "TransformersMistralTokenizer") -> None:
238+
def __init__(self, tokenizer: "MistralCommonBackend") -> None:
227239
super().__init__()
228240

229241
from mistral_common.protocol.instruct.validator import ValidationMode

vllm/transformers_utils/config.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def __getitem__(self, key):
8989
step3_text="Step3TextConfig",
9090
qwen3_next="Qwen3NextConfig",
9191
lfm2_moe="Lfm2MoeConfig",
92+
tarsier2="Tarsier2Config",
9293
)
9394

9495
_CONFIG_ATTRS_MAPPING: dict[str, str] = {
@@ -127,6 +128,9 @@ def parse(
127128
if config_dict.get("speculators_config") is not None
128129
else model_type
129130
)
131+
# Allow hf_overrides to override model_type before checking _CONFIG_REGISTRY
132+
if (hf_overrides := kwargs.pop("hf_overrides", None)) is not None:
133+
model_type = hf_overrides.get("model_type", model_type)
130134

131135
if model_type in _CONFIG_REGISTRY:
132136
config_class = _CONFIG_REGISTRY[model_type]
@@ -310,7 +314,7 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
310314
config.rope_parameters["rope_theta"] = rope_theta
311315

312316
# No RoPE parameters to patch
313-
if not hasattr(config, "rope_parameters"):
317+
if getattr(config, "rope_parameters", None) is None:
314318
return
315319

316320
# Add original_max_position_embeddings if present
@@ -351,7 +355,10 @@ def patch_rope_parameters_dict(rope_parameters: dict[str, Any]) -> None:
351355
rope_parameters["rope_type"] = "longrope"
352356
logger.warning("Replacing legacy rope_type 'su' with 'longrope'")
353357
elif rope_parameters["rope_type"] == "mrope":
354-
assert "mrope_section" in rope_parameters
358+
if "mrope_section" not in rope_parameters:
359+
raise ValueError(
360+
"Legacy rope_type 'mrope' requires 'mrope_section' in rope_parameters"
361+
)
355362
rope_parameters["rope_type"] = "default"
356363
logger.warning("Replacing legacy rope_type 'mrope' with 'default'")
357364

@@ -584,6 +591,7 @@ def get_config(
584591
trust_remote_code=trust_remote_code,
585592
revision=revision,
586593
code_revision=code_revision,
594+
hf_overrides=hf_overrides_kw,
587595
**kwargs,
588596
)
589597
# Special architecture mapping check for GGUF models
@@ -915,11 +923,13 @@ def get_hf_text_config(config: PretrainedConfig):
915923
"""
916924
text_config = config.get_text_config()
917925

918-
if text_config is not config:
919-
# The code operates under the assumption that text_config should have
920-
# `num_attention_heads` (among others). Assert here to fail early
921-
# if transformers config doesn't align with this assumption.
922-
assert hasattr(text_config, "num_attention_heads")
926+
if text_config is not config and not hasattr(text_config, "num_attention_heads"):
927+
raise ValueError(
928+
"The text_config extracted from the model config does not have "
929+
"`num_attention_heads` attribute. This indicates a mismatch "
930+
"between the model config and vLLM's expectations. Please "
931+
"ensure that the model config is compatible with vLLM."
932+
)
923933

924934
return text_config
925935

vllm/transformers_utils/configs/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
Step3VisionEncoderConfig,
4949
Step3VLConfig,
5050
)
51+
from vllm.transformers_utils.configs.tarsier2 import Tarsier2Config
5152
from vllm.transformers_utils.configs.ultravox import UltravoxConfig
5253

5354
__all__ = [
@@ -81,4 +82,5 @@
8182
"Step3VisionEncoderConfig",
8283
"Step3TextConfig",
8384
"Qwen3NextConfig",
85+
"Tarsier2Config",
8486
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
from transformers import Qwen2VLConfig
4+
5+
6+
class Tarsier2Config(Qwen2VLConfig):
7+
"""
8+
Tarsier2's config.json is written such that AutoConfig.from_pretrained will create
9+
a deeply nested config consisting of:
10+
11+
- LlavaConfig
12+
- Qwen2VLConfig
13+
- Qwen2VLTextConfig
14+
- Qwen2VLVisionConfig
15+
- Qwen2VLConfig
16+
- Qwen2VLTextConfig
17+
- Qwen2VLVisionConfig
18+
19+
When it should really just be a single Qwen2VLConfig.
20+
21+
This class is a hack to stop AutoConfig from creating the nested config structure.
22+
"""
23+
24+
model_type = "tarsier2"

0 commit comments

Comments
 (0)