diff --git a/swift/megatron/convert.py b/swift/megatron/convert.py index 2dc2263552..36f46eea75 100644 --- a/swift/megatron/convert.py +++ b/swift/megatron/convert.py @@ -1,6 +1,8 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import math +import os +import shutil from contextlib import contextmanager from dataclasses import fields from typing import Any, Dict @@ -15,7 +17,7 @@ from megatron.training.initialize import initialize_megatron from swift.llm import ExportArguments, HfConfigFactory, prepare_model_template, to_device, to_float_dtype -from swift.utils import get_logger, get_n_params_grads +from swift.utils import get_logger, get_n_params_grads, is_master from .argument import MegatronArguments from .model import get_megatron_model_meta from .utils import (convert_hf_config, forward_step_helper, get_padding_to, patch_load_base_checkpoint, @@ -332,6 +334,12 @@ def convert_mcore2hf(args: ExportArguments) -> None: bridge = megatron_model_meta.bridge_cls() logger.info('Converting weights and saving the model...') bridge.save_weights([mg_model], args.output_dir) + if is_master(): + args_path = os.path.join(megatron_args.adapter_load or megatron_args.load or args.model, 'args.json') + if os.path.exists(args_path): + shutil.copy(args_path, os.path.join(args.output_dir, 'args.json')) + else: + args.save_args(args.output_dir) logger.info(f'Successfully saved HF model weights in `{args.output_dir}`.') if args.test_convert_precision: hf_model, template = prepare_model_template(args, model=args.output_dir) diff --git a/swift/megatron/export/export.py b/swift/megatron/export/export.py index a1f51ff47f..691e5f1c84 100644 --- a/swift/megatron/export/export.py +++ b/swift/megatron/export/export.py @@ -67,6 +67,7 @@ def convert_mcore2hf(self) -> None: shutil.copy(args_path, os.path.join(args.save, 'args.json')) else: args.save_args(args.save) + logger.info(f'Successfully saved HF model weights in `{args.save}`.') if args.test_convert_precision: with disable_safe_ddp_context_use_barrier(): if save_peft_format: @@ -114,13 +115,18 @@ def convert_hf2mcore(self) -> None: logger.info('Merge LoRA...') mg_model = peft_model.merge_and_unload() logger.info('Successfully transferred HF model weights to MG model.') + # hf_model does not support loading args.adapter_load, so test_convert_precision cannot be performed + support_convert_precision = args.adapter_load is None if args.test_convert_precision: - with disable_safe_ddp_context_use_barrier(): - device_map = args.device_map or 'auto' - hf_model, template = prepare_model_template( - args, device_map=device_map) if is_last_rank() else (None, template) - test_convert_precision(hf_model, mg_model, template, args.test_convert_dtype) - dist.barrier() + if support_convert_precision: + with disable_safe_ddp_context_use_barrier(): + device_map = args.device_map or 'auto' + hf_model, template = prepare_model_template( + args, device_map=device_map) if is_last_rank() else (None, template) + test_convert_precision(hf_model, mg_model, template, args.test_convert_dtype) + dist.barrier() + else: + logger.warning('Skip test_convert_precision because `--adapter_load` is specified.') args.save_args(args.save) logger.info('Saving the model...') save_peft_format = args.train_type == 'lora' and not args.merge_lora diff --git a/swift/megatron/model/gpt_bridge.py b/swift/megatron/model/gpt_bridge.py index 853193d378..07ae6dc870 100644 --- a/swift/megatron/model/gpt_bridge.py +++ b/swift/megatron/model/gpt_bridge.py @@ -1432,6 +1432,8 @@ def save_weights(self, mg_models, output_dir: str, is_peft_format: bool = False) if is_peft_format: from swift.llm import get_multimodal_target_regex peft_config = copy(mg_models[0].peft_config[self._adapter_name]) + if args.task_type == 'seq_cls': + peft_config.task_type = 'SEQ_CLS' if args.is_multimodal and 'all-linear' in args.target_modules: peft_config.target_modules = get_multimodal_target_regex( self.hf_model,