Skip to content

Commit 79323d6

Browse files
committed
add more info in code and ut
Signed-off-by: xinhe3 <[email protected]>
1 parent 62f81e5 commit 79323d6

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

auto_round/autoround.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,11 @@ def quantize_via_rtn_blockwise(self, all_to_quantized_module_names: list[str]) -
13971397
if self.device_map is not None:
13981398
accelerate.hooks.remove_hook_from_submodules(block)
13991399

1400-
if is_nv_fp(self.act_data_type) and any("nv_fp" in format_ for format_ in self.formats):
1400+
if (
1401+
hasattr(self, "formats")
1402+
and is_nv_fp(self.act_data_type)
1403+
and any("nv_fp" in format_ for format_ in self.formats)
1404+
):
14011405
from auto_round.utils import set_amax_for_all_moe_layers
14021406

14031407
# enable moe experts act_max automatic generation for linears
@@ -3022,14 +3026,19 @@ def _generate_block_recipe(self, block, input_ids, input_others):
30223026

30233027
# fetch mix-precision recipe configuration
30243028
sample_num = self.recipe_mp_config.get("sample_num", 8)
3025-
mp_ratio = self.recipe_mp_config.get("mp_ratio", 1 / 7)
3029+
mp_ratio = self.recipe_mp_config.get("mp_ratio", 1 / 3)
30263030
loss_weight = float(self.recipe_mp_config.get("loss_weight", 2.0))
30273031
numel_weight = float(self.recipe_mp_config.get("numel_weight", 1.0))
30283032
loss_numel_ratio = loss_weight / numel_weight
30293033

30303034
# calculate the number of layers to use mix-precision
30313035
quantizable_layers = [n for n, m in block.named_modules() if isinstance(m, SUPPORTED_LAYER_TYPES)]
3036+
mp_ratio_list = [f"{i}/{len(quantizable_layers)}" for i in range(1, len(quantizable_layers))]
30323037
quantizable_num = int(mp_ratio * len(quantizable_layers)) # It's ceiling
3038+
logger.warning_once(
3039+
f"[Recipe Mode] {len(quantizable_layers)} layers are detected, so the available mp_ratio values are {mp_ratio_list}"
3040+
)
3041+
logger.warning_once(f"[Recipe Mode] {quantizable_num} layers of each block use the mixed precision.")
30333042
# fetch raw low-bits dtype of block for recovering mix-precision block
30343043
layer = get_module(block, quantizable_layers[0])
30353044
raw_dtype = {
@@ -3103,7 +3112,7 @@ def get_loss(q_block):
31033112
logger.debug(f"{hp_layers}, {loss}, {numel}")
31043113

31053114
hp_layers = get_best_combination(combination_list, numel_list, loss_list, loss_numel_ratio)
3106-
logger.info(f"final hp layers: {hp_layers}")
3115+
logger.info(f"[Recipe Mode] Mix precision layers in this block: {hp_layers}")
31073116
return hp_layers
31083117

31093118
def _dump_average_bits(self, layer_config=None):

test/test_cpu/test_recipe.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,17 @@ def test_recipe_api(self):
5252
seqlen=2,
5353
dataset=self.llm_dataloader,
5454
)
55-
layer_config = autoround._generate_recipe()
55+
layer_config = autoround._generate_recipe(
56+
mp_dtype={
57+
"data_type": "mx_fp8",
58+
"act_data_type": "mx_fp8",
59+
},
60+
mp_config={
61+
"mp_ratio": 1 / 3,
62+
"loss_weight": 2.0,
63+
"numel_weight": 1.0,
64+
},
65+
)
5666
autoround.layer_config = layer_config
5767
autoround.quantize()
5868
# autoround.quantize_and_save() # save is not supported for mix-precision

0 commit comments

Comments
 (0)