fix bug and add nvfp in alg-ext with slight improvement (#794)

wenhuach21 · web-flow · commit 7e014ca38b5a · 2025-09-05T17:40:08.000+08:00
diff --git a/README.md b/README.md
@@ -28,6 +28,9 @@ and [fbaldassarri](https://huggingface.co/fbaldassarri). Please check out [User
 
 ## 🆕 What's New
 
+[2025/09] AutoRound now includes experimental support for the mxfp4 and nvfp4 dtypes. For accuracy results, see the [documentation](./docs/mxnv_acc.md)
+. We currently recommend exporting to the LLM-Compressor format.
+
 [2025/08] AutoRound now provides experimental support for an improved INT2 algorithm via `--enable_alg_ext`. See this [documentation](./docs/alg_202508.md)
  for some accuracy results. 
 
diff --git a/auto_round/alg_ext.cpython-310-x86_64-linux-gnu.so b/auto_round/alg_ext.cpython-310-x86_64-linux-gnu.so
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -2885,16 +2885,21 @@ def _quantize_blocks(
             self.sym
             and self.enable_alg_ext
             and self.super_group_size is None
-            and ((self.data_type.startswith("int") and self.act_bits >= 8) or self.data_type.startswith("mx"))
+            and (
+                (self.data_type.startswith("int") and self.act_bits >= 8)
+                or self.data_type.startswith("mx")
+                or self.data_type.startswith("nv")
+            )
         ):
             try:
                 from auto_round.alg_ext import quantize_block_ext
 
                 AutoRound.quantize_block_ext = quantize_block_ext
                 quantize_block = self.quantize_block_ext  # must use self.quantize_block_ext
-                if self.bits > 2 and not self.data_type.startswith("mx"):
+                if self.bits > 2 and (not self.data_type.startswith("mx") or not self.data_type.startswith("nv")):
                     logger.warning(
-                        "algorithm extension has only undergone limited validation on INT2 and mxfp4; use with caution."
+                        "algorithm extension has only undergone limited validation on "
+                        "INT2,mxfp4 and nvfp4; use with caution."
                     )
                 else:
                     logger.info("using algorithm extension for quantization.")
diff --git a/docs/mxnv_acc.md b/docs/mxnv_acc.md
@@ -0,0 +1,15 @@
+Average accuracy of hellaswag,lambada_openai,mmlu,piqa,winogrande.
+
+We evaluated using a fake model since we currently have no access to devices for running the real models. However, we have verified that in most cases the fake model closely matches the real model.
+
+| mxfp4 g32         | llama3.1-8B-Instruct | Qwen2-7.5-Instruct | Phi4    | Qwen3-32B |
+|-------------------|----------------------|--------------------|---------|-----------|
+| RTN               | 0.62124              | 0.65502            | 0.71674 | 0.69006   |
+| AutoRound         | 0.66862              | 0.67588            | 0.72472 | 0.72106   |
+| AutoRound+alg_ext | 0.6732               | 0.68094            | 0.72252 | 0.72012   |
+
+| nvfp4  g16        | llama3.1-8B-Instruct | Qwen2-7.5-Instruct | Phi4    | Qwen3-32B |
+|-------------------|----------------------|--------------------|---------|-----------|
+| RTN               | 0.68756              | 0.6906             | 0.72962 | 0.71636   |
+| AutoRound         | 0.69184              | 0.69728            | 0.73058 | 0.73062   |
+| AutoRound+alg_ext | 0.69648              | 0.6989             | 0.7318  |           |