Arm backend: Make per-channel quantization default for VgfPipeline (#12705)

YufengShi-dudu · web-flow · commit b9c31e5be14f · 2025-07-23T15:40:14.000+02:00
Signed-off-by: Yufeng Shi &lt;yufeng.shi@arm.com&gt;
diff --git a/backends/arm/test/ops/test_multihead_attention.py b/backends/arm/test/ops/test_multihead_attention.py
@@ -11,6 +11,7 @@
     EthosU85PipelineBI,
     TosaPipelineBI,
     TosaPipelineMI,
+    VgfPipeline,
 )
 
 
@@ -105,3 +106,39 @@ def test_multihead_attention_u85_BI(test_data: input_t1):
         per_channel_quantization=False,
     )
     pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_suite,
+)
+@common.SkipIfNoModelConverter
+def test_multihead_attention_vgf_FP(test_data: input_t1):
+    test_data_vals, module = test_data()
+    pipeline = VgfPipeline[input_t1](
+        module,
+        (*test_data_vals, *test_data_vals, *test_data_vals),
+        [],
+        [],
+        tosa_version="TOSA-1.0+FP",
+    )
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_suite,
+)
+@common.SkipIfNoModelConverter
+def test_multihead_attention_vgf_INT(test_data: input_t1):
+    test_data_vals, module = test_data()
+    pipeline = VgfPipeline[input_t1](
+        module,
+        (*test_data_vals, *test_data_vals, *test_data_vals),
+        [],
+        [],
+        tosa_version="TOSA-1.0+INT",
+        # TODO: Per-channel quantization is broken (MLETORCH-1144)
+        per_channel_quantization=False,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py
@@ -854,7 +854,7 @@ def __init__(
         vgf_compiler_flags: Optional[str] = "",
         tosa_version: str = "TOSA-1.0+FP",
         symmetric_io_quantization: bool = False,
-        per_channel_quantization: bool = False,
+        per_channel_quantization: bool = True,
         use_to_edge_transform_and_lower: bool = True,
         custom_path: str = None,
         atol: float = 1e-03,
@@ -866,11 +866,6 @@ def __init__(
         ] = None,
     ):
 
-        if (
-            symmetric_io_quantization or per_channel_quantization
-        ) and tosa_version == "TOSA-1.0+FP":
-            raise ValueError("Dont configure quantization with FP TOSA profile.")
-
         tosa_profile = TosaSpecification.create_from_string(tosa_version)
         compile_spec = common.get_vgf_compile_spec(
             tosa_profile, compiler_flags=vgf_compiler_flags, custom_path=custom_path
@@ -887,18 +882,15 @@ def __init__(
             transform_passes=transform_passes,
         )
 
-        if symmetric_io_quantization or per_channel_quantization:
+        if "INT" in tosa_version:
             quantizer = VgfQuantizer(compile_spec)
             quantization_config = get_symmetric_quantization_config(
                 is_per_channel=per_channel_quantization
             )
             if symmetric_io_quantization:
                 quantizer.set_io(quantization_config)
             quant_stage = Quantize(quantizer, quantization_config)
-        else:
-            quant_stage = None
 
-        if "INT" in tosa_version:
             self.add_stage(self.tester.quantize, quant_stage, pos=0)
 
             self.add_stage_after(