minor updates

realAsma · realAsma · commit d210561b140e · 2025-11-24T16:49:27.000-08:00
Signed-off-by: realAsma &lt;akuriparambi@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/algorithms.py b/modelopt/torch/quantization/algorithms.py
@@ -737,11 +737,19 @@ def register_custom_support(
         grad_ckpt_context: Callable,
         is_param_grad_enabled: Callable,
     ) -> None:
-        """Register custom support for `AutoQuantize` score estimation.
+        """(Optional) Register custom support for `AutoQuantize` score estimation.
+
+        This custom support is used to enable memory/compute efficient backward gradient propagation. This involves:
+        - `grad_ckpt_context`: backward pass with gradient checkpointing enabled
+        - `is_param_grad_enabled`: AutoQuantize only needs activation gradients to be computed (not weight
+          gradients). `is_param_grad_enabled` is used to select which parameters should have gradients enabled,
+          limiting gradient computation to only what's needed for activation gradients. For LLMs, to trigger all
+          activation gradient computation, just enabling the embedding layer weight gradient is sufficient. This will
+          enable gradient computation for all the activation gradients downstream.
 
         If the `is_supported_checker(model)` returns True, the `grad_ckpt_context(model)` will be
         used to enable gradient checkpointing and `is_param_grad_enabled(pname, model)`
-        will be used to enable gradient for the parameter.
+        will be used to select which parameters have gradients enabled to minimize gradient computation.
         """
         cls.custom_support.append((is_supported_checker, grad_ckpt_context, is_param_grad_enabled))
 
@@ -793,10 +801,7 @@ def auto_quantize_score_estimate_forward(module, input, *args, **kwargs):
                             output_diff -= output
                         module.output_diff_dict[hparam][recipe] = output_diff.detach()
 
-            # Disable the configurable hparam so that they do not affect the any
-            # other hparam's score estimation
-            for hparam in module._hparams_for_scoring:
-                if hparam.is_configurable:
+                    # Disable the configurable hparam now that we have computed the diff
                     hparam.active = QuantRecipe(quant_cfg=None)
 
             return output
diff --git a/tests/unit/torch/quantization/plugins/test_huggingface.py b/tests/unit/torch/quantization/plugins/test_huggingface.py
@@ -160,8 +160,6 @@ def test_autoquantize_huggingface():
             verbose=True,
         )
 
-    print(search_history, model)
-
 
 @pytest.mark.parametrize(
     ("model_cls", "quant_config"),

Original file line number	Diff line number	Diff line change
`@@ -160,8 +160,6 @@ def test_autoquantize_huggingface():`
`160`	`160`	`verbose=True,`
`161`	`161`	`)`
`162`	`162`
`163`		`- print(search_history, model)`
`164`		`-`
`165`	`163`
`166`	`164`	`@pytest.mark.parametrize(`
`167`	`165`	`("model_cls", "quant_config"),`