vllm-project · brian-dellabetta · Oct 1, 2025 · Sep 26, 2025 · Sep 29, 2025 · Sep 29, 2025
diff --git a/examples/multimodal_vision/README.md b/examples/multimodal_vision/README.md
@@ -37,7 +37,7 @@ recipe = [
         targets="Linear",
         scheme="W4A16",
         sequential_targets=["MistralDecoderLayer"],
-        ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
+        ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
     ),
 ]
 ```

diff --git a/examples/multimodal_vision/llama4_example.py b/examples/multimodal_vision/llama4_example.py
@@ -52,9 +52,11 @@ def preprocess_function(example):
 def data_collator(batch):
     assert len(batch) == 1
     return {
-        key: torch.tensor(value)
-        if key != "pixel_values"
-        else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
+        key: (
+            torch.tensor(value)
+            if key != "pixel_values"
+            else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
+        )
         for key, value in batch[0].items()
     }
 
@@ -67,8 +69,8 @@ def data_collator(batch):
         "re:.*lm_head",
         "re:.*self_attn",
         "re:.*router",
-        "re:vision_model.*",
-        "re:multi_modal_projector.*",
+        "re:.*vision_model.*",
+        "re:.*multi_modal_projector.*",
         "Llama4TextAttention",
     ],
 )

diff --git a/examples/multimodal_vision/llava_example.py b/examples/multimodal_vision/llava_example.py
@@ -30,7 +30,7 @@ def data_collator(batch):
     GPTQModifier(
         targets="Linear",
         scheme="W4A16",
-        ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
+        ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
     ),
 ]
 

diff --git a/examples/multimodal_vision/mistral3_example.py b/examples/multimodal_vision/mistral3_example.py
@@ -31,9 +31,11 @@
 def data_collator(batch):
     assert len(batch) == 1
     return {
-        key: torch.tensor(value)
-        if key != "pixel_values"
-        else torch.tensor(value, dtype=model.dtype)
+        key: (
+            torch.tensor(value)
+            if key != "pixel_values"
+            else torch.tensor(value, dtype=model.dtype)
+        )
         for key, value in batch[0].items()
     }
 
@@ -43,7 +45,7 @@ def data_collator(batch):
     GPTQModifier(
         targets="Linear",
         scheme="W4A16",
-        ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
+        ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
     ),
 ]
 

diff --git a/examples/multimodal_vision/mllama_example.py b/examples/multimodal_vision/mllama_example.py
@@ -30,7 +30,7 @@ def data_collator(batch):
     GPTQModifier(
         targets="Linear",
         scheme="W4A16",
-        ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"],
+        ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_model.*"],
     ),
 ]
 

diff --git a/examples/multimodal_vision/pixtral_example.py b/examples/multimodal_vision/pixtral_example.py
@@ -36,7 +36,7 @@ def data_collator(batch):
     GPTQModifier(
         targets="Linear",
         scheme="W4A16",
-        ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
+        ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
     ),
 ]
 

diff --git a/examples/quantization_w4a4_fp4/llama4_example.py b/examples/quantization_w4a4_fp4/llama4_example.py
@@ -52,9 +52,11 @@ def preprocess_function(example):
 def data_collator(batch):
     assert len(batch) == 1
     return {
-        key: torch.tensor(value)
-        if key != "pixel_values"
-        else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
+        key: (
+            torch.tensor(value)
+            if key != "pixel_values"
+            else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
+        )
         for key, value in batch[0].items()
     }
 
@@ -67,8 +69,8 @@ def data_collator(batch):
         "re:.*lm_head",
         "re:.*self_attn",
         "re:.*router",
-        "re:vision_model.*",
-        "re:multi_modal_projector.*",
+        "re:.*vision_model.*",
+        "re:.*multi_modal_projector.*",
         "Llama4TextAttention",
     ],
 )

diff --git a/examples/quantization_w8a8_fp8/llama3.2_vision_example.py b/examples/quantization_w8a8_fp8/llama3.2_vision_example.py
@@ -17,7 +17,7 @@
 recipe = QuantizationModifier(
     targets="Linear",
     scheme="FP8_DYNAMIC",
-    ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"],
+    ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_model.*"],
 )
 
 # Apply quantization and save to disk in compressed-tensors format.

diff --git a/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py b/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py
@@ -22,8 +22,8 @@
         "re:.*lm_head",
         "re:.*self_attn",
         "re:.*router",
-        "re:vision_model.*",
-        "re:multi_modal_projector.*",
+        "re:.*vision_model.*",
+        "re:.*multi_modal_projector.*",
         "Llama4TextAttention",
     ],
 )

diff --git a/examples/quantization_w8a8_fp8/llava1.5_example.py b/examples/quantization_w8a8_fp8/llava1.5_example.py
@@ -17,7 +17,7 @@
 recipe = QuantizationModifier(
     targets="Linear",
     scheme="FP8_DYNAMIC",
-    ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_tower.*"],
+    ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_tower.*"],
 )
 
 # Apply quantization and save to disk in compressed-tensors format.

diff --git a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml
@@ -1,7 +1,7 @@
 quant_stage:
   quant_modifiers:
     QuantizationModifier:
-      ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"]
+      ignore: ["lm_head", "re:vision_tower.*", "re:.*multi_modal_projector.*", "re:.*visual.*", "re:.*vision_model.*"]
       config_groups:
         group_0:
           weights:

diff --git a/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml b/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
@@ -3,7 +3,7 @@ quant_stage:
     SmoothQuantModifier:
       smoothing_strength: 0.8
     GPTQModifier:
-      ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"]
+      ignore: ["lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*", "re:.*visual.*", "re:.*vision_model.*"]
       actorder: null
       config_groups:
         group_0:

diff --git a/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml b/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
@@ -1,7 +1,7 @@
 quant_stage:
   quant_modifiers:
     GPTQModifier:
-      ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"]
+      ignore: ["lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*", "re:.*visual.*", "re:.*vision_model.*"]
       actorder: "weight"
       config_groups:
         group_0: