diff --git a/examples/multimodal_vision/README.md b/examples/multimodal_vision/README.md index c0d0808b4..e8a2e0c9a 100644 --- a/examples/multimodal_vision/README.md +++ b/examples/multimodal_vision/README.md @@ -37,7 +37,7 @@ recipe = [ targets="Linear", scheme="W4A16", sequential_targets=["MistralDecoderLayer"], - ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"], + ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"], ), ] ``` diff --git a/examples/multimodal_vision/llama4_example.py b/examples/multimodal_vision/llama4_example.py index 292fa8300..d93df605a 100644 --- a/examples/multimodal_vision/llama4_example.py +++ b/examples/multimodal_vision/llama4_example.py @@ -52,9 +52,11 @@ def preprocess_function(example): def data_collator(batch): assert len(batch) == 1 return { - key: torch.tensor(value) - if key != "pixel_values" - else torch.tensor(value, dtype=torch.bfloat16).squeeze(0) + key: ( + torch.tensor(value) + if key != "pixel_values" + else torch.tensor(value, dtype=torch.bfloat16).squeeze(0) + ) for key, value in batch[0].items() } @@ -67,8 +69,8 @@ def data_collator(batch): "re:.*lm_head", "re:.*self_attn", "re:.*router", - "re:vision_model.*", - "re:multi_modal_projector.*", + "re:.*vision_model.*", + "re:.*multi_modal_projector.*", "Llama4TextAttention", ], ) diff --git a/examples/multimodal_vision/llava_example.py b/examples/multimodal_vision/llava_example.py index 0673fed9b..da0f71218 100644 --- a/examples/multimodal_vision/llava_example.py +++ b/examples/multimodal_vision/llava_example.py @@ -30,7 +30,7 @@ def data_collator(batch): GPTQModifier( targets="Linear", scheme="W4A16", - ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"], + ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"], ), ] diff --git a/examples/multimodal_vision/mistral3_example.py b/examples/multimodal_vision/mistral3_example.py index b7281dadd..6f9567cf1 100644 --- a/examples/multimodal_vision/mistral3_example.py +++ b/examples/multimodal_vision/mistral3_example.py @@ -31,9 +31,11 @@ def data_collator(batch): assert len(batch) == 1 return { - key: torch.tensor(value) - if key != "pixel_values" - else torch.tensor(value, dtype=model.dtype) + key: ( + torch.tensor(value) + if key != "pixel_values" + else torch.tensor(value, dtype=model.dtype) + ) for key, value in batch[0].items() } @@ -43,7 +45,7 @@ def data_collator(batch): GPTQModifier( targets="Linear", scheme="W4A16", - ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"], + ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"], ), ] diff --git a/examples/multimodal_vision/mllama_example.py b/examples/multimodal_vision/mllama_example.py index c54bb27a4..edc7bc91f 100644 --- a/examples/multimodal_vision/mllama_example.py +++ b/examples/multimodal_vision/mllama_example.py @@ -30,7 +30,7 @@ def data_collator(batch): GPTQModifier( targets="Linear", scheme="W4A16", - ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"], + ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_model.*"], ), ] diff --git a/examples/multimodal_vision/pixtral_example.py b/examples/multimodal_vision/pixtral_example.py index b86b90411..3ce58629a 100644 --- a/examples/multimodal_vision/pixtral_example.py +++ b/examples/multimodal_vision/pixtral_example.py @@ -36,7 +36,7 @@ def data_collator(batch): GPTQModifier( targets="Linear", scheme="W4A16", - ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"], + ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"], ), ] diff --git a/examples/quantization_w4a4_fp4/llama4_example.py b/examples/quantization_w4a4_fp4/llama4_example.py index 28b57dda9..ec476f1f0 100644 --- a/examples/quantization_w4a4_fp4/llama4_example.py +++ b/examples/quantization_w4a4_fp4/llama4_example.py @@ -52,9 +52,11 @@ def preprocess_function(example): def data_collator(batch): assert len(batch) == 1 return { - key: torch.tensor(value) - if key != "pixel_values" - else torch.tensor(value, dtype=torch.bfloat16).squeeze(0) + key: ( + torch.tensor(value) + if key != "pixel_values" + else torch.tensor(value, dtype=torch.bfloat16).squeeze(0) + ) for key, value in batch[0].items() } @@ -67,8 +69,8 @@ def data_collator(batch): "re:.*lm_head", "re:.*self_attn", "re:.*router", - "re:vision_model.*", - "re:multi_modal_projector.*", + "re:.*vision_model.*", + "re:.*multi_modal_projector.*", "Llama4TextAttention", ], ) diff --git a/examples/quantization_w8a8_fp8/llama3.2_vision_example.py b/examples/quantization_w8a8_fp8/llama3.2_vision_example.py index 9cd24387f..7871a73e9 100644 --- a/examples/quantization_w8a8_fp8/llama3.2_vision_example.py +++ b/examples/quantization_w8a8_fp8/llama3.2_vision_example.py @@ -17,7 +17,7 @@ recipe = QuantizationModifier( targets="Linear", scheme="FP8_DYNAMIC", - ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"], + ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_model.*"], ) # Apply quantization and save to disk in compressed-tensors format. diff --git a/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py b/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py index d28e02716..6d34d8e98 100644 --- a/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py +++ b/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py @@ -22,8 +22,8 @@ "re:.*lm_head", "re:.*self_attn", "re:.*router", - "re:vision_model.*", - "re:multi_modal_projector.*", + "re:.*vision_model.*", + "re:.*multi_modal_projector.*", "Llama4TextAttention", ], ) diff --git a/examples/quantization_w8a8_fp8/llava1.5_example.py b/examples/quantization_w8a8_fp8/llava1.5_example.py index 9bce0ba42..9c9849254 100644 --- a/examples/quantization_w8a8_fp8/llava1.5_example.py +++ b/examples/quantization_w8a8_fp8/llava1.5_example.py @@ -17,7 +17,7 @@ recipe = QuantizationModifier( targets="Linear", scheme="FP8_DYNAMIC", - ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_tower.*"], + ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_tower.*"], ) # Apply quantization and save to disk in compressed-tensors format. diff --git a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml index 07fc0256e..c70fd0b79 100644 --- a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml +++ b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml @@ -1,7 +1,7 @@ quant_stage: quant_modifiers: QuantizationModifier: - ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"] + ignore: ["lm_head", "re:vision_tower.*", "re:.*multi_modal_projector.*", "re:.*visual.*", "re:.*vision_model.*"] config_groups: group_0: weights: diff --git a/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml b/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml index c316150a5..c174963e4 100644 --- a/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml +++ b/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml @@ -3,7 +3,7 @@ quant_stage: SmoothQuantModifier: smoothing_strength: 0.8 GPTQModifier: - ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"] + ignore: ["lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*", "re:.*visual.*", "re:.*vision_model.*"] actorder: null config_groups: group_0: diff --git a/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml b/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml index fa14b2175..964fdf4c0 100644 --- a/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml +++ b/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml @@ -1,7 +1,7 @@ quant_stage: quant_modifiers: GPTQModifier: - ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*", "re:model.visual.*"] + ignore: ["lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*", "re:.*visual.*", "re:.*vision_model.*"] actorder: "weight" config_groups: group_0: