From 285deda47558272a102d19d08aba9fa20954b379 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 15 Aug 2025 13:33:17 -0400 Subject: [PATCH] remove FSDP prefix when using save_pretrained with FSDP2 --- src/transformers/modeling_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index fa3fb3d176ac..e0c5ce6dc9a8 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3934,7 +3934,8 @@ def save_pretrained( model_to_save.config.torch_dtype = str(dtype).split(".")[1] # Attach architecture to the config - model_to_save.config.architectures = [model_to_save.__class__.__name__] + # When using FSDP2, unwrapping is a noop, so the model name doesn't change back to the original model name + model_to_save.config.architectures = [model_to_save.__class__.__name__.lstrip("FSDP")] # If we have a custom model, we copy the file defining it in the folder and set the attributes so it can be # loaded from the Hub.