@@ -1397,7 +1397,11 @@ def quantize_via_rtn_blockwise(self, all_to_quantized_module_names: list[str]) -
1397
1397
if self .device_map is not None :
1398
1398
accelerate .hooks .remove_hook_from_submodules (block )
1399
1399
1400
- if is_nv_fp (self .act_data_type ) and any ("nv_fp" in format_ for format_ in self .formats ):
1400
+ if (
1401
+ hasattr (self , "formats" )
1402
+ and is_nv_fp (self .act_data_type )
1403
+ and any ("nv_fp" in format_ for format_ in self .formats )
1404
+ ):
1401
1405
from auto_round .utils import set_amax_for_all_moe_layers
1402
1406
1403
1407
# enable moe experts act_max automatic generation for linears
@@ -3022,14 +3026,19 @@ def _generate_block_recipe(self, block, input_ids, input_others):
3022
3026
3023
3027
# fetch mix-precision recipe configuration
3024
3028
sample_num = self .recipe_mp_config .get ("sample_num" , 8 )
3025
- mp_ratio = self .recipe_mp_config .get ("mp_ratio" , 1 / 7 )
3029
+ mp_ratio = self .recipe_mp_config .get ("mp_ratio" , 1 / 3 )
3026
3030
loss_weight = float (self .recipe_mp_config .get ("loss_weight" , 2.0 ))
3027
3031
numel_weight = float (self .recipe_mp_config .get ("numel_weight" , 1.0 ))
3028
3032
loss_numel_ratio = loss_weight / numel_weight
3029
3033
3030
3034
# calculate the number of layers to use mix-precision
3031
3035
quantizable_layers = [n for n , m in block .named_modules () if isinstance (m , SUPPORTED_LAYER_TYPES )]
3036
+ mp_ratio_list = [f"{ i } /{ len (quantizable_layers )} " for i in range (1 , len (quantizable_layers ))]
3032
3037
quantizable_num = int (mp_ratio * len (quantizable_layers )) # It's ceiling
3038
+ logger .warning_once (
3039
+ f"[Recipe Mode] { len (quantizable_layers )} layers are detected, so the available mp_ratio values are { mp_ratio_list } "
3040
+ )
3041
+ logger .warning_once (f"[Recipe Mode] { quantizable_num } layers of each block use the mixed precision." )
3033
3042
# fetch raw low-bits dtype of block for recovering mix-precision block
3034
3043
layer = get_module (block , quantizable_layers [0 ])
3035
3044
raw_dtype = {
@@ -3103,7 +3112,7 @@ def get_loss(q_block):
3103
3112
logger .debug (f"{ hp_layers } , { loss } , { numel } " )
3104
3113
3105
3114
hp_layers = get_best_combination (combination_list , numel_list , loss_list , loss_numel_ratio )
3106
- logger .info (f"final hp layers: { hp_layers } " )
3115
+ logger .info (f"[Recipe Mode] Mix precision layers in this block : { hp_layers } " )
3107
3116
return hp_layers
3108
3117
3109
3118
def _dump_average_bits (self , layer_config = None ):
0 commit comments