Fix compatibility with pipeline when loading model with device_map on single gpu (#10390)

* fix device issue in single gpu case * Update src/diffusers/pipelines/pipeline_utils.py Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>

Fix compatibility with pipeline when loading model with device_map on single gpu (#10390)
* fix device issue in single gpu case * Update src/diffusers/pipelines/pipeline_utils.py Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
e2deb82e · Marc Sun · GitHub · 1288c856 · e2deb82e · e2deb82e
Unverified Commit e2deb82e authored Jan 08, 2025 by Marc Sun Committed by GitHub Jan 08, 2025
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 10 deletions

src/diffusers/models/modeling_utils.py src/diffusers/models/modeling_utils.py +0 -4

src/diffusers/pipelines/pipeline_utils.py src/diffusers/pipelines/pipeline_utils.py +7 -6

No files found.
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -920,14 +920,12 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
                else:  # else let accelerate handle loading and dispatching.
                    # Load weights and dispatch according to the device_map
                    # by default the device_map is None and the weights are loaded on the CPU
-                    force_hook = True
                    device_map = _determine_device_map(
                        model, device_map, max_memory, torch_dtype, keep_in_fp32_modules, hf_quantizer
                    )
                    if device_map is None and is_sharded:
                        # we load the parameters on the cpu
                        device_map = {"": "cpu"}
-                        force_hook = False
                    try:
                        accelerate.load_checkpoint_and_dispatch(
                            model,
@@ -937,7 +935,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
                            offload_folder=offload_folder,
                            offload_state_dict=offload_state_dict,
                            dtype=torch_dtype,
-                            force_hooks=force_hook,
                            strict=True,
                        )
                    except AttributeError as e:
@@ -967,7 +964,6 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
                                offload_folder=offload_folder,
                                offload_state_dict=offload_state_dict,
                                dtype=torch_dtype,
-                                force_hooks=force_hook,
                                strict=True,
                            )
                            model._undo_temp_convert_self_to_deprecated_attention_blocks()

--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@@ -411,6 +411,13 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
        pipeline_is_sequentially_offloaded = any(
            module_is_sequentially_offloaded(module) for _, module in self.components.items()
        )
+
+        is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
+        if is_pipeline_device_mapped:
+            raise ValueError(
+                "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline."
+            )
+
        if device and torch.device(device).type == "cuda":
            if pipeline_is_sequentially_offloaded and not pipeline_has_bnb:
                raise ValueError(
@@ -422,12 +429,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
                    "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation."
                )

-        is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
-        if is_pipeline_device_mapped:
-            raise ValueError(
-                "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` first and then call `to()`."
-            )
-
        # Display a warning in this case (the operation succeeds but the benefits are lost)
        pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items())
        if pipeline_is_offloaded and device and torch.device(device).type == "cuda":