Skip to content

Commit a10c2bf

Browse files
Isotr0pyCyrilvallez
authored andcommitted
Fix multimodal processor get duplicate arguments when receive kwargs for initialization (#39125)
* fix processor tokenizer override Signed-off-by: Isotr0py <[email protected]> * code format Signed-off-by: Isotr0py <[email protected]> * add regression test Signed-off-by: Isotr0py <[email protected]> * fix Signed-off-by: Isotr0py <[email protected]> * check image processor same Signed-off-by: Isotr0py <[email protected]> --------- Signed-off-by: Isotr0py <[email protected]>
1 parent 650bc67 commit a10c2bf

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

src/transformers/processing_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,9 +1097,13 @@ def from_args_and_dict(cls, args, processor_dict: dict[str, Any], **kwargs):
10971097
processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs
10981098
)
10991099

1100-
# remove args that are in processor_dict to avoid duplicate arguments
1101-
args_to_remove = [i for i, arg in enumerate(accepted_args_and_kwargs) if arg in processor_dict]
1102-
args = [arg for i, arg in enumerate(args) if i not in args_to_remove]
1100+
# update args that are already in processor_dict to avoid duplicate arguments
1101+
args_to_update = {
1102+
i: valid_kwargs.pop(arg)
1103+
for i, arg in enumerate(accepted_args_and_kwargs)
1104+
if (arg in valid_kwargs and i < len(args))
1105+
}
1106+
args = [arg if i not in args_to_update else args_to_update[i] for i, arg in enumerate(args)]
11031107

11041108
# instantiate processor with used (and valid) kwargs only
11051109
processor = cls(*args, **valid_kwargs)

tests/test_processing_common.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,18 @@ def test_doubly_passed_kwargs(self):
351351
return_tensors="pt",
352352
)
353353

354+
def test_args_overlap_kwargs(self):
355+
if "image_processor" not in self.processor_class.attributes:
356+
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
357+
processor_first = self.get_processor()
358+
image_processor = processor_first.image_processor
359+
image_processor.is_override = True
360+
361+
with tempfile.TemporaryDirectory() as tmpdirname:
362+
processor_first.save_pretrained(tmpdirname)
363+
processor_second = self.processor_class.from_pretrained(tmpdirname, image_processor=image_processor)
364+
self.assertTrue(processor_second.image_processor.is_override)
365+
354366
def test_structured_kwargs_nested(self):
355367
if "image_processor" not in self.processor_class.attributes:
356368
self.skipTest(f"image_processor attribute not present in {self.processor_class}")

0 commit comments

Comments
 (0)