File tree Expand file tree Collapse file tree 2 files changed +3
-0
lines changed Expand file tree Collapse file tree 2 files changed +3
-0
lines changed Original file line number Diff line number Diff line change @@ -89,3 +89,4 @@ torch.distributed.barrier()
8989
90901 . AWQ is not yet supported in vLLM.
91912 . PTQ/QAT checkpoint doesn't work with KV Cache quantization enabled.
92+ 3 . Mixed precision checkpoint doesn't work currently.
Original file line number Diff line number Diff line change @@ -582,6 +582,8 @@ def export_hf_checkpoint(
582582 dtype: the weights data type to export the unquantized layers or the default model data type if None.
583583 export_dir: the target export path.
584584 save_modelopt_state: whether to save the modelopt state_dict.
585+ export_bf16_weights_amax: whether to export the bf16 weights and amax values separately. This can be used for
586+ vLLM fakequant serving.
585587 """
586588 export_dir = Path (export_dir )
587589 export_dir .mkdir (parents = True , exist_ok = True )
You can’t perform that action at this time.
0 commit comments