[NVIDIA#6530][fix] Fix script when using calibration tensors from modelopt (NVIDIA#6803)

achartier · web-flow · commit 2e0081b53ecb · 2025-08-12T20:41:10.000-07:00
Signed-off-by: Aurelien Chartier &lt;2567591+achartier@users.noreply.github.com&gt;
diff --git a/examples/quantization/quantize_mixed_precision_moe.py b/examples/quantization/quantize_mixed_precision_moe.py
@@ -45,10 +45,16 @@ def load_and_preprocess_state_dict(modelopt_state_root, world_size=8):
     state_dict_list = []
     # load amax from state dict
     for rank in range(world_size):
-        state_dict_list.append(
-            torch.load(
-                f"{modelopt_state_root}/amax_dict_rank{rank}-mp{world_size}.pt",
-                map_location="cuda:0"))
+        amax_file = f"{modelopt_state_root}/amax_dict_rank{rank}-mp{world_size}.pt"
+        if os.path.exists(amax_file):
+            state_dict_list.append(torch.load(amax_file, map_location="cuda:0"))
+        else:
+            print(f"WARNING: amax file not found: {amax_file}")
+
+    if not state_dict_list:
+        print("ERROR: No amax files loaded!")
+        return {}
+
     # calculate the max across all TP ranks
     merged_state_dict = state_dict_list[0]
     for rank in range(world_size):
@@ -232,15 +238,18 @@ def get_file_name(layer):
                 continue
             new_safetensors.update({key: get_tensor(key)})
 
+    # Process activation scales for all ranks
+    if os.path.isdir(args.act_scales):
+        # Extract activation scales
+        renamed_state_dict = load_and_preprocess_state_dict(
+            modelopt_state_root=args.act_scales, world_size=8)
+        scales = get_scales_from_amax(start_layer=start_layer,
+                                      end_layer=end_layer,
+                                      renamed_state_dict=renamed_state_dict)
+        new_safetensors.update(scales)
+
     if args.rank == 0:
-        if os.path.isdir(args.act_scales):
-            # Extract activation scales
-            renamed_state_dict = load_and_preprocess_state_dict(
-                modelopt_state_root=args.act_scales, world_size=8)
-            get_scales_from_amax(start_layer=start_layer,
-                                 end_layer=end_layer,
-                                 renamed_state_dict=renamed_state_dict)
-        else:
+        if not os.path.isdir(args.act_scales):
             input_scales = safe_open(args.act_scales, "pt")
             for k in input_scales.keys():
                 new_safetensors.update({k: input_scales.get_tensor(k)})
@@ -259,7 +268,10 @@ def get_file_name(layer):
         ]
         for name in names:
             shutil.copy(os.path.join(model_dir, name), output_dir)
-        shutil.copy(args.act_scales, output_dir)
+        if os.path.isdir(args.act_scales):
+            shutil.copytree(args.act_scales, output_dir, dirs_exist_ok=True)
+        else:
+            shutil.copy(args.act_scales, output_dir)
 
         # config.json
         del config['quantization_config']