Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
41ba8c0b
Unverified
Commit
41ba8c0b
authored
Dec 20, 2024
by
Aryan
Committed by
GitHub
Dec 19, 2024
Browse files
Add support for sharded models when TorchAO quantization is enabled (#10256)
* add sharded + device_map check
parent
31912484
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
24 deletions
+48
-24
src/diffusers/models/modeling_utils.py
src/diffusers/models/modeling_utils.py
+1
-1
tests/quantization/torchao/test_torchao.py
tests/quantization/torchao/test_torchao.py
+47
-23
No files found.
src/diffusers/models/modeling_utils.py
View file @
41ba8c0b
...
...
@@ -802,7 +802,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
revision
=
revision
,
subfolder
=
subfolder
or
""
,
)
if
hf_quantizer
is
not
None
:
if
hf_quantizer
is
not
None
and
is_bnb_quantization_method
:
model_file
=
_merge_sharded_checkpoints
(
sharded_ckpt_cached_folder
,
sharded_metadata
)
logger
.
info
(
"Merged sharded checkpoints as `hf_quantizer` is not None."
)
is_sharded
=
False
...
...
tests/quantization/torchao/test_torchao.py
View file @
41ba8c0b
...
...
@@ -278,13 +278,14 @@ class TorchAoTest(unittest.TestCase):
self
.
assertEqual
(
weight
.
quant_max
,
15
)
self
.
assertTrue
(
isinstance
(
weight
.
layout_type
,
TensorCoreTiledLayoutType
))
def
test_
offload
(
self
):
def
test_
device_map
(
self
):
"""
Test if the quantized model int4 weight-only is working properly with cpu/disk offload. Also verifies
that the device map is correctly set (in the `hf_device_map` attribute of the model).
Test if the quantized model int4 weight-only is working properly with "auto" and custom device maps.
The custom device map performs cpu/disk offloading as well. Also verifies that the device map is
correctly set (in the `hf_device_map` attribute of the model).
"""
device_map_
offload
=
{
custom_
device_map_
dict
=
{
"time_text_embed"
:
torch_device
,
"context_embedder"
:
torch_device
,
"x_embedder"
:
torch_device
,
...
...
@@ -293,26 +294,49 @@ class TorchAoTest(unittest.TestCase):
"norm_out"
:
torch_device
,
"proj_out"
:
"cpu"
,
}
device_maps
=
[
"auto"
,
custom_device_map_dict
]
inputs
=
self
.
get_dummy_tensor_inputs
(
torch_device
)
expected_slice
=
np
.
array
([
0.3457
,
-
0.0366
,
0.0105
,
-
0.2275
,
-
0.4941
,
0.4395
,
-
0.166
,
-
0.6641
,
0.4375
])
for
device_map
in
device_maps
:
device_map_to_compare
=
{
""
:
0
}
if
device_map
==
"auto"
else
device_map
# Test non-sharded model
with
tempfile
.
TemporaryDirectory
()
as
offload_folder
:
quantization_config
=
TorchAoConfig
(
"int4_weight_only"
,
group_size
=
64
)
quantized_model
=
FluxTransformer2DModel
.
from_pretrained
(
"hf-internal-testing/tiny-flux-pipe"
,
subfolder
=
"transformer"
,
quantization_config
=
quantization_config
,
device_map
=
device_map
_offload
,
device_map
=
device_map
,
torch_dtype
=
torch
.
bfloat16
,
offload_folder
=
offload_folder
,
)
self
.
assertTrue
(
quantized_model
.
hf_device_map
==
device_map_offload
)
self
.
assertTrue
(
quantized_model
.
hf_device_map
==
device_map_to_compare
)
output
=
quantized_model
(
**
inputs
)[
0
]
output_slice
=
output
.
flatten
()[
-
9
:].
detach
().
float
().
cpu
().
numpy
()
self
.
assertTrue
(
np
.
allclose
(
output_slice
,
expected_slice
,
atol
=
1e-3
,
rtol
=
1e-3
))
# Test sharded model
with
tempfile
.
TemporaryDirectory
()
as
offload_folder
:
quantization_config
=
TorchAoConfig
(
"int4_weight_only"
,
group_size
=
64
)
quantized_model
=
FluxTransformer2DModel
.
from_pretrained
(
"hf-internal-testing/tiny-flux-sharded"
,
subfolder
=
"transformer"
,
quantization_config
=
quantization_config
,
device_map
=
device_map
,
torch_dtype
=
torch
.
bfloat16
,
offload_folder
=
offload_folder
,
)
self
.
assertTrue
(
quantized_model
.
hf_device_map
==
device_map_to_compare
)
output
=
quantized_model
(
**
inputs
)[
0
]
output_slice
=
output
.
flatten
()[
-
9
:].
detach
().
float
().
cpu
().
numpy
()
expected_slice
=
np
.
array
([
0.3457
,
-
0.0366
,
0.0105
,
-
0.2275
,
-
0.4941
,
0.4395
,
-
0.166
,
-
0.6641
,
0.4375
])
self
.
assertTrue
(
np
.
allclose
(
output_slice
,
expected_slice
,
atol
=
1e-3
,
rtol
=
1e-3
))
def
test_modules_to_not_convert
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment