Skip to content

Commit 0de99c0

Browse files
committed
Fixes #12673. record_stream is not working properly
1 parent c8656ed commit 0de99c0

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/diffusers/hooks/group_offloading.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,6 @@ def _pinned_memory_tensors(self):
155155

156156
def _transfer_tensor_to_device(self, tensor, source_tensor):
157157
tensor.data = source_tensor.to(self.onload_device, non_blocking=self.non_blocking)
158-
if self.record_stream:
159-
tensor.data.record_stream(self._torch_accelerator_module.current_stream())
160158

161159
def _process_tensors_from_modules(self, pinned_memory=None):
162160
for group_module in self.modules:
@@ -240,6 +238,8 @@ def _offload_to_memory(self):
240238

241239
for group_module in self.modules:
242240
for param in group_module.parameters():
241+
if self.record_stream and param.device.type == 'cuda':
242+
param.data.record_stream(self._torch_accelerator_module.current_stream())
243243
param.data = self.cpu_param_dict[param]
244244
for param in self.parameters:
245245
param.data = self.cpu_param_dict[param]

0 commit comments

Comments
 (0)