Deprecate old QAT APIs

andrewor14 · andrewor14 · commit b5aeafa9b6cf · 2025-08-01T08:14:32.000-07:00
**Summary:** Deprecates QAT APIs that should no longer be used. Print helpful deprecation warning to help users migrate. **Test Plan:** ``` python test/quantization/test_qat.py -k test_qat_api_deprecation ``` Also manual testing: ``` 'IntXQuantizationAwareTrainingConfig' is deprecated and will be removed in a future release. Please use the following API instead: base_config = Int8DynamicActivationInt4WeightConfig(group_size=32) quantize_(model, QATConfig(base_config, step="prepare")) # train (not shown) quantize_(model, QATConfig(base_config, step="convert")) Alternatively, if you prefer to pass in fake quantization configs: activation_config = IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False) weight_config = IntxFakeQuantizeConfig(torch.int4, group_size=32) qat_config = QATConfig( activation_config=activation_config, weight_config=weight_config, step="prepare", ) quantize_(model, qat_config) Please see #2630 for more details. IntXQuantizationAwareTrainingConfig(activation_config=None, weight_config=None) ``` ghstack-source-id: 7ac9f3b Pull Request resolved: #2641
diff --git a/docs/source/api_ref_qat.rst b/docs/source/api_ref_qat.rst
@@ -32,15 +32,13 @@ Custom QAT APIs
     linear.enable_linear_fake_quant
     linear.disable_linear_fake_quant
 
-Legacy QAT APIs
+Legacy QAT Quantizers
 ---------------------
 
 .. autosummary::
     :toctree: generated/
     :nosignatures:
 
-    IntXQuantizationAwareTrainingConfig
-    FromIntXQuantizationAwareTrainingConfig
     Int4WeightOnlyQATQuantizer
     linear.Int4WeightOnlyQATLinear
     Int8DynActInt4WeightQATQuantizer
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -8,6 +8,8 @@
 # This test takes a long time to run
 
 import copy
+import io
+import logging
 import unittest
 from typing import List
 
@@ -1844,6 +1846,64 @@ def test_legacy_quantize_api_e2e(self):
         baseline_out = baseline_model(*x2)
         torch.testing.assert_close(out, baseline_out, atol=0, rtol=0)
 
+    def _test_deprecation(self, deprecated_class, *example_args, first_time=True):
+        """
+        Assert that instantiating a deprecated class triggers the deprecation warning.
+        """
+        try:
+            log_stream = io.StringIO()
+            handler = logging.StreamHandler(log_stream)
+            logger = logging.getLogger(deprecated_class.__module__)
+            logger.addHandler(handler)
+            logger.setLevel(logging.WARN)
+            deprecated_class(*example_args)
+            if first_time:
+                regex = (
+                    "'%s' is deprecated and will be removed in a future release"
+                    % deprecated_class.__name__
+                )
+                self.assertIn(regex, log_stream.getvalue())
+            else:
+                self.assertEqual(log_stream.getvalue(), "")
+        finally:
+            logger.removeHandler(handler)
+            handler.close()
+
+    @unittest.skipIf(
+        not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower"
+    )
+    def test_qat_api_deprecation(self):
+        """
+        Test that the appropriate deprecation warning has been logged.
+        """
+        from torchao.quantization.qat import (
+            FakeQuantizeConfig,
+            from_intx_quantization_aware_training,
+            intx_quantization_aware_training,
+        )
+        from torchao.quantization.qat.utils import _LOGGED_DEPRECATED_CLASSES
+
+        # Reset deprecation warning state, otherwise we won't log warnings here
+        _LOGGED_DEPRECATED_CLASSES.clear()
+
+        # Assert that the deprecation warning is logged
+        self._test_deprecation(IntXQuantizationAwareTrainingConfig)
+        self._test_deprecation(FromIntXQuantizationAwareTrainingConfig)
+        self._test_deprecation(intx_quantization_aware_training)
+        self._test_deprecation(from_intx_quantization_aware_training)
+        self._test_deprecation(FakeQuantizeConfig, torch.int8, "per_channel")
+
+        # Assert that warning is only logged once per class
+        self._test_deprecation(IntXQuantizationAwareTrainingConfig, first_time=False)
+        self._test_deprecation(
+            FromIntXQuantizationAwareTrainingConfig, first_time=False
+        )
+        self._test_deprecation(intx_quantization_aware_training, first_time=False)
+        self._test_deprecation(from_intx_quantization_aware_training, first_time=False)
+        self._test_deprecation(
+            FakeQuantizeConfig, torch.int8, "per_channel", first_time=False
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/torchao/quantization/qat/api.py b/torchao/quantization/qat/api.py
@@ -24,6 +24,7 @@
     _infer_fake_quantize_configs,
 )
 from .linear import FakeQuantizedLinear
+from .utils import _log_deprecation_warning
 
 
 class QATStep(str, Enum):
@@ -224,11 +225,11 @@ def _qat_config_transform(
         return _QUANTIZE_CONFIG_HANDLER[type(base_config)](module, base_config)
 
 
-# TODO: deprecate
 @dataclass
 class IntXQuantizationAwareTrainingConfig(AOBaseConfig):
     """
-    (Will be deprecated soon)
+    (Deprecated) Please use :class:`~torchao.quantization.qat.QATConfig` instead.
+
     Config for applying fake quantization to a `torch.nn.Module`.
     to be used with :func:`~torchao.quantization.quant_api.quantize_`.
 
@@ -256,9 +257,13 @@ class IntXQuantizationAwareTrainingConfig(AOBaseConfig):
     activation_config: Optional[FakeQuantizeConfigBase] = None
     weight_config: Optional[FakeQuantizeConfigBase] = None
 
+    def __post_init__(self):
+        _log_deprecation_warning(self)
+
 
 # for BC
-intx_quantization_aware_training = IntXQuantizationAwareTrainingConfig
+class intx_quantization_aware_training(IntXQuantizationAwareTrainingConfig):
+    pass
 
 
 @register_quantize_module_handler(IntXQuantizationAwareTrainingConfig)
@@ -286,10 +291,11 @@ def _intx_quantization_aware_training_transform(
         raise ValueError("Module of type '%s' does not have QAT support" % type(mod))
 
 
-# TODO: deprecate
+@dataclass
 class FromIntXQuantizationAwareTrainingConfig(AOBaseConfig):
     """
-    (Will be deprecated soon)
+    (Deprecated) Please use :class:`~torchao.quantization.qat.QATConfig` instead.
+
     Config for converting a model with fake quantized modules,
     such as :func:`~torchao.quantization.qat.linear.FakeQuantizedLinear`
     and :func:`~torchao.quantization.qat.linear.FakeQuantizedEmbedding`,
@@ -306,11 +312,13 @@ class FromIntXQuantizationAwareTrainingConfig(AOBaseConfig):
         )
     """
 
-    pass
+    def __post_init__(self):
+        _log_deprecation_warning(self)
 
 
 # for BC
-from_intx_quantization_aware_training = FromIntXQuantizationAwareTrainingConfig
+class from_intx_quantization_aware_training(FromIntXQuantizationAwareTrainingConfig):
+    pass
 
 
 @register_quantize_module_handler(FromIntXQuantizationAwareTrainingConfig)
diff --git a/torchao/quantization/qat/fake_quantize_config.py b/torchao/quantization/qat/fake_quantize_config.py
@@ -25,6 +25,8 @@
     ZeroPointDomain,
 )
 
+from .utils import _log_deprecation_warning
+
 
 class FakeQuantizeConfigBase(abc.ABC):
     """
@@ -134,6 +136,14 @@ def __init__(
         if is_dynamic and range_learning:
             raise ValueError("`is_dynamic` is not compatible with `range_learning`")
 
+        self.__post_init__()
+
+    def __post_init__(self):
+        """
+        For deprecation only, can remove after https://github.com/pytorch/ao/issues/2630.
+        """
+        pass
+
     def _get_granularity(
         self,
         granularity: Union[Granularity, str, None],
@@ -260,7 +270,13 @@ def __setattr__(self, name: str, value: Any):
 
 
 # For BC
-FakeQuantizeConfig = IntxFakeQuantizeConfig
+class FakeQuantizeConfig(IntxFakeQuantizeConfig):
+    """
+    (Deprecated) Please use :class:`~torchao.quantization.qat.IntxFakeQuantizeConfig` instead.
+    """
+
+    def __post_init__(self):
+        _log_deprecation_warning(self)
 
 
 def _infer_fake_quantize_configs(
diff --git a/torchao/quantization/qat/utils.py b/torchao/quantization/qat/utils.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
+from typing import Any
 
 import torch
 
@@ -104,3 +106,42 @@ def _get_qmin_qmax(n_bit: int, symmetric: bool = True):
         qmin = 0
         qmax = 2**n_bit - 1
     return (qmin, qmax)
+
+
+# log deprecation warning only once per class
+_LOGGED_DEPRECATED_CLASSES = set[type]()
+
+
+def _log_deprecation_warning(old_api_object: Any):
+    """
+    Log a helpful deprecation message pointing users to the new QAT API,
+    only once per deprecated class.
+    """
+    global _LOGGED_DEPRECATED_CLASSES
+    if old_api_object.__class__ in _LOGGED_DEPRECATED_CLASSES:
+        return
+    _LOGGED_DEPRECATED_CLASSES.add(old_api_object.__class__)
+    logger = logging.getLogger(old_api_object.__module__)
+    logger.warning(
+        """'%s' is deprecated and will be removed in a future release. Please use the following API instead:
+
+    base_config = Int8DynamicActivationInt4WeightConfig(group_size=32)
+    quantize_(model, QATConfig(base_config, step="prepare"))
+    # train (not shown)
+    quantize_(model, QATConfig(base_config, step="convert"))
+
+Alternatively, if you prefer to pass in fake quantization configs:
+
+    activation_config = IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False)
+    weight_config = IntxFakeQuantizeConfig(torch.int4, group_size=32)
+    qat_config = QATConfig(
+        activation_config=activation_config,
+        weight_config=weight_config,
+        step="prepare",
+    )
+    quantize_(model, qat_config)
+
+Please see https://github.com/pytorch/ao/issues/2630 for more details.
+        """
+        % old_api_object.__class__.__name__
+    )