Unverified Commit 91545666 authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[tests] model-level `device_map` clarifications (#11681)

* add clarity in documentation for device_map

* docs

* fix how compiler tester mixins are used.

* propagate

* more

* typo.

* fix tests

* fix order of decroators.

* clarify more.

* more test cases.

* fix doc

* fix device_map docstring in pipeline_utils.

* more examples

* more

* update

* remove code for stuff that is already supported.

* fix stuff.
parent b6f79330
...@@ -814,14 +814,43 @@ class ModelMixin(torch.nn.Module, PushToHubMixin): ...@@ -814,14 +814,43 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
Mirror source to resolve accessibility issues if you're downloading a model in China. We do not Mirror source to resolve accessibility issues if you're downloading a model in China. We do not
guarantee the timeliness or safety of the source, and you should refer to the mirror site for more guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
information. information.
device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): device_map (`Union[int, str, torch.device]` or `Dict[str, Union[int, str, torch.device]]`, *optional*):
A map that specifies where each submodule should go. It doesn't need to be defined for each A map that specifies where each submodule should go. It doesn't need to be defined for each
parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the
same device. Defaults to `None`, meaning that the model will be loaded on CPU. same device. Defaults to `None`, meaning that the model will be loaded on CPU.
Examples:
```py
>>> from diffusers import AutoModel
>>> import torch
>>> # This works.
>>> model = AutoModel.from_pretrained(
... "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", device_map="cuda"
... )
>>> # This also works (integer accelerator device ID).
>>> model = AutoModel.from_pretrained(
... "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", device_map=0
... )
>>> # Specifying a supported offloading strategy like "auto" also works.
>>> model = AutoModel.from_pretrained(
... "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", device_map="auto"
... )
>>> # Specifying a dictionary as `device_map` also works.
>>> model = AutoModel.from_pretrained(
... "stabilityai/stable-diffusion-xl-base-1.0",
... subfolder="unet",
... device_map={"": torch.device("cuda")},
... )
```
Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For
more information about each option see [designing a device more information about each option see [designing a device
map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). map](https://huggingface.co/docs/accelerate/en/concept_guides/big_model_inference#the-devicemap). You
can also refer to the [Diffusers-specific
documentation](https://huggingface.co/docs/diffusers/main/en/training/distributed_inference#model-sharding)
for more concrete examples.
max_memory (`Dict`, *optional*): max_memory (`Dict`, *optional*):
A dictionary device identifier for the maximum memory. Will default to the maximum memory available for A dictionary device identifier for the maximum memory. Will default to the maximum memory available for
each GPU and the available CPU RAM if unset. each GPU and the available CPU RAM if unset.
...@@ -1387,7 +1416,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin): ...@@ -1387,7 +1416,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
low_cpu_mem_usage: bool = True, low_cpu_mem_usage: bool = True,
dtype: Optional[Union[str, torch.dtype]] = None, dtype: Optional[Union[str, torch.dtype]] = None,
keep_in_fp32_modules: Optional[List[str]] = None, keep_in_fp32_modules: Optional[List[str]] = None,
device_map: Dict[str, Union[int, str, torch.device]] = None, device_map: Union[str, int, torch.device, Dict[str, Union[int, str, torch.device]]] = None,
offload_state_dict: Optional[bool] = None, offload_state_dict: Optional[bool] = None,
offload_folder: Optional[Union[str, os.PathLike]] = None, offload_folder: Optional[Union[str, os.PathLike]] = None,
dduf_entries: Optional[Dict[str, DDUFEntry]] = None, dduf_entries: Optional[Dict[str, DDUFEntry]] = None,
......
...@@ -669,14 +669,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin): ...@@ -669,14 +669,11 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not
guarantee the timeliness or safety of the source, and you should refer to the mirror site for more guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
information. information.
device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): device_map (`str`, *optional*):
A map that specifies where each submodule should go. It doesn’t need to be defined for each Strategy that dictates how the different components of a pipeline should be placed on available
parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the devices. Currently, only "balanced" `device_map` is supported. Check out
same device. [this](https://huggingface.co/docs/diffusers/main/en/tutorials/inference_with_big_models#device-placement)
to know more.
Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For
more information about each option see [designing a device
map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
max_memory (`Dict`, *optional*): max_memory (`Dict`, *optional*):
A dictionary device identifier for the maximum memory. Will default to the maximum memory available for A dictionary device identifier for the maximum memory. Will default to the maximum memory available for
each GPU and the available CPU RAM if unset. each GPU and the available CPU RAM if unset.
......
...@@ -46,6 +46,7 @@ from diffusers.utils.testing_utils import ( ...@@ -46,6 +46,7 @@ from diffusers.utils.testing_utils import (
require_peft_backend, require_peft_backend,
require_torch_accelerator, require_torch_accelerator,
require_torch_accelerator_with_fp16, require_torch_accelerator_with_fp16,
require_torch_gpu,
skip_mps, skip_mps,
slow, slow,
torch_all_close, torch_all_close,
...@@ -1083,6 +1084,42 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1083,6 +1084,42 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@parameterized.expand(
[
(-1, "You can't pass device_map as a negative int"),
("foo", "When passing device_map as a string, the value needs to be a device name"),
]
)
def test_wrong_device_map_raises_error(self, device_map, msg_substring):
with self.assertRaises(ValueError) as err_ctx:
_ = self.model_class.from_pretrained(
"hf-internal-testing/unet2d-sharded-dummy-subfolder", subfolder="unet", device_map=device_map
)
assert msg_substring in str(err_ctx.exception)
@parameterized.expand([0, "cuda", torch.device("cuda"), torch.device("cuda:0")])
@require_torch_gpu
def test_passing_non_dict_device_map_works(self, device_map):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
loaded_model = self.model_class.from_pretrained(
"hf-internal-testing/unet2d-sharded-dummy-subfolder", subfolder="unet", device_map=device_map
)
output = loaded_model(**inputs_dict)
assert output.sample.shape == (4, 4, 16, 16)
@parameterized.expand([("", "cuda"), ("", torch.device("cuda"))])
@require_torch_gpu
def test_passing_dict_device_map_works(self, name, device_map):
# There are other valid dict-based `device_map` values too. It's best to refer to
# the docs for those: https://huggingface.co/docs/accelerate/en/concept_guides/big_model_inference#the-devicemap.
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
loaded_model = self.model_class.from_pretrained(
"hf-internal-testing/unet2d-sharded-dummy-subfolder", subfolder="unet", device_map={name: device_map}
)
output = loaded_model(**inputs_dict)
assert output.sample.shape == (4, 4, 16, 16)
@require_peft_backend @require_peft_backend
def test_load_attn_procs_raise_warning(self): def test_load_attn_procs_raise_warning(self):
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment