Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
32ff4773
Unverified
Commit
32ff4773
authored
Dec 19, 2023
by
Dhruv Nair
Committed by
GitHub
Dec 19, 2023
Browse files
ControlNetXS fixes. (#6228)
update
parent
288ceebe
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
67 additions
and
23 deletions
+67
-23
src/diffusers/models/controlnetxs.py
src/diffusers/models/controlnetxs.py
+60
-21
tests/pipelines/controlnetxs/test_controlnetxs.py
tests/pipelines/controlnetxs/test_controlnetxs.py
+7
-2
No files found.
src/diffusers/models/controlnetxs.py
View file @
32ff4773
...
@@ -23,9 +23,7 @@ from torch.nn.modules.normalization import GroupNorm
...
@@ -23,9 +23,7 @@ from torch.nn.modules.normalization import GroupNorm
from
..configuration_utils
import
ConfigMixin
,
register_to_config
from
..configuration_utils
import
ConfigMixin
,
register_to_config
from
..utils
import
BaseOutput
,
logging
from
..utils
import
BaseOutput
,
logging
from
.attention_processor
import
(
from
.attention_processor
import
USE_PEFT_BACKEND
,
AttentionProcessor
AttentionProcessor
,
)
from
.autoencoders
import
AutoencoderKL
from
.autoencoders
import
AutoencoderKL
from
.lora
import
LoRACompatibleConv
from
.lora
import
LoRACompatibleConv
from
.modeling_utils
import
ModelMixin
from
.modeling_utils
import
ModelMixin
...
@@ -817,11 +815,23 @@ def increase_block_input_in_encoder_resnet(unet: UNet2DConditionModel, block_no,
...
@@ -817,11 +815,23 @@ def increase_block_input_in_encoder_resnet(unet: UNet2DConditionModel, block_no,
norm_kwargs
=
{
a
:
getattr
(
old_norm1
,
a
)
for
a
in
norm_args
}
norm_kwargs
=
{
a
:
getattr
(
old_norm1
,
a
)
for
a
in
norm_args
}
norm_kwargs
[
"num_channels"
]
+=
by
# surgery done here
norm_kwargs
[
"num_channels"
]
+=
by
# surgery done here
# conv1
# conv1
conv1_args
=
(
conv1_args
=
[
"in_channels out_channels kernel_size stride padding dilation groups bias padding_mode lora_layer"
.
split
(
" "
)
"in_channels"
,
)
"out_channels"
,
"kernel_size"
,
"stride"
,
"padding"
,
"dilation"
,
"groups"
,
"bias"
,
"padding_mode"
,
]
if
not
USE_PEFT_BACKEND
:
conv1_args
.
append
(
"lora_layer"
)
for
a
in
conv1_args
:
for
a
in
conv1_args
:
assert
hasattr
(
old_conv1
,
a
)
assert
hasattr
(
old_conv1
,
a
)
conv1_kwargs
=
{
a
:
getattr
(
old_conv1
,
a
)
for
a
in
conv1_args
}
conv1_kwargs
=
{
a
:
getattr
(
old_conv1
,
a
)
for
a
in
conv1_args
}
conv1_kwargs
[
"bias"
]
=
"bias"
in
conv1_kwargs
# as param, bias is a boolean, but as attr, it's a tensor.
conv1_kwargs
[
"bias"
]
=
"bias"
in
conv1_kwargs
# as param, bias is a boolean, but as attr, it's a tensor.
conv1_kwargs
[
"in_channels"
]
+=
by
# surgery done here
conv1_kwargs
[
"in_channels"
]
+=
by
# surgery done here
...
@@ -839,25 +849,42 @@ def increase_block_input_in_encoder_resnet(unet: UNet2DConditionModel, block_no,
...
@@ -839,25 +849,42 @@ def increase_block_input_in_encoder_resnet(unet: UNet2DConditionModel, block_no,
}
}
# swap old with new modules
# swap old with new modules
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
norm1
=
GroupNorm
(
**
norm_kwargs
)
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
norm1
=
GroupNorm
(
**
norm_kwargs
)
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
conv1
=
LoRACompatibleConv
(
**
conv1_kwargs
)
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
conv1
=
(
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
conv_shortcut
=
LoRACompatibleConv
(
**
conv_shortcut_args_kwargs
)
nn
.
Conv2d
(
**
conv1_kwargs
)
if
USE_PEFT_BACKEND
else
LoRACompatibleConv
(
**
conv1_kwargs
)
)
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
conv_shortcut
=
(
nn
.
Conv2d
(
**
conv_shortcut_args_kwargs
)
if
USE_PEFT_BACKEND
else
LoRACompatibleConv
(
**
conv_shortcut_args_kwargs
)
)
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
in_channels
+=
by
# surgery done here
unet
.
down_blocks
[
block_no
].
resnets
[
resnet_idx
].
in_channels
+=
by
# surgery done here
def
increase_block_input_in_encoder_downsampler
(
unet
:
UNet2DConditionModel
,
block_no
,
by
):
def
increase_block_input_in_encoder_downsampler
(
unet
:
UNet2DConditionModel
,
block_no
,
by
):
"""Increase channels sizes to allow for additional concatted information from base model"""
"""Increase channels sizes to allow for additional concatted information from base model"""
old_down
=
unet
.
down_blocks
[
block_no
].
downsamplers
[
0
].
conv
old_down
=
unet
.
down_blocks
[
block_no
].
downsamplers
[
0
].
conv
# conv1
args
=
"in_channels out_channels kernel_size stride padding dilation groups bias padding_mode lora_layer"
.
split
(
args
=
[
" "
"in_channels"
,
)
"out_channels"
,
"kernel_size"
,
"stride"
,
"padding"
,
"dilation"
,
"groups"
,
"bias"
,
"padding_mode"
,
]
if
not
USE_PEFT_BACKEND
:
args
.
append
(
"lora_layer"
)
for
a
in
args
:
for
a
in
args
:
assert
hasattr
(
old_down
,
a
)
assert
hasattr
(
old_down
,
a
)
kwargs
=
{
a
:
getattr
(
old_down
,
a
)
for
a
in
args
}
kwargs
=
{
a
:
getattr
(
old_down
,
a
)
for
a
in
args
}
kwargs
[
"bias"
]
=
"bias"
in
kwargs
# as param, bias is a boolean, but as attr, it's a tensor.
kwargs
[
"bias"
]
=
"bias"
in
kwargs
# as param, bias is a boolean, but as attr, it's a tensor.
kwargs
[
"in_channels"
]
+=
by
# surgery done here
kwargs
[
"in_channels"
]
+=
by
# surgery done here
# swap old with new modules
# swap old with new modules
unet
.
down_blocks
[
block_no
].
downsamplers
[
0
].
conv
=
LoRACompatibleConv
(
**
kwargs
)
unet
.
down_blocks
[
block_no
].
downsamplers
[
0
].
conv
=
(
nn
.
Conv2d
(
**
kwargs
)
if
USE_PEFT_BACKEND
else
LoRACompatibleConv
(
**
kwargs
)
)
unet
.
down_blocks
[
block_no
].
downsamplers
[
0
].
channels
+=
by
# surgery done here
unet
.
down_blocks
[
block_no
].
downsamplers
[
0
].
channels
+=
by
# surgery done here
...
@@ -871,12 +898,20 @@ def increase_block_input_in_mid_resnet(unet: UNet2DConditionModel, by):
...
@@ -871,12 +898,20 @@ def increase_block_input_in_mid_resnet(unet: UNet2DConditionModel, by):
assert
hasattr
(
old_norm1
,
a
)
assert
hasattr
(
old_norm1
,
a
)
norm_kwargs
=
{
a
:
getattr
(
old_norm1
,
a
)
for
a
in
norm_args
}
norm_kwargs
=
{
a
:
getattr
(
old_norm1
,
a
)
for
a
in
norm_args
}
norm_kwargs
[
"num_channels"
]
+=
by
# surgery done here
norm_kwargs
[
"num_channels"
]
+=
by
# surgery done here
# conv1
conv1_args
=
[
conv1_args
=
(
"in_channels"
,
"in_channels out_channels kernel_size stride padding dilation groups bias padding_mode lora_layer"
.
split
(
" "
)
"out_channels"
,
)
"kernel_size"
,
for
a
in
conv1_args
:
"stride"
,
assert
hasattr
(
old_conv1
,
a
)
"padding"
,
"dilation"
,
"groups"
,
"bias"
,
"padding_mode"
,
]
if
not
USE_PEFT_BACKEND
:
conv1_args
.
append
(
"lora_layer"
)
conv1_kwargs
=
{
a
:
getattr
(
old_conv1
,
a
)
for
a
in
conv1_args
}
conv1_kwargs
=
{
a
:
getattr
(
old_conv1
,
a
)
for
a
in
conv1_args
}
conv1_kwargs
[
"bias"
]
=
"bias"
in
conv1_kwargs
# as param, bias is a boolean, but as attr, it's a tensor.
conv1_kwargs
[
"bias"
]
=
"bias"
in
conv1_kwargs
# as param, bias is a boolean, but as attr, it's a tensor.
conv1_kwargs
[
"in_channels"
]
+=
by
# surgery done here
conv1_kwargs
[
"in_channels"
]
+=
by
# surgery done here
...
@@ -894,8 +929,12 @@ def increase_block_input_in_mid_resnet(unet: UNet2DConditionModel, by):
...
@@ -894,8 +929,12 @@ def increase_block_input_in_mid_resnet(unet: UNet2DConditionModel, by):
}
}
# swap old with new modules
# swap old with new modules
unet
.
mid_block
.
resnets
[
0
].
norm1
=
GroupNorm
(
**
norm_kwargs
)
unet
.
mid_block
.
resnets
[
0
].
norm1
=
GroupNorm
(
**
norm_kwargs
)
unet
.
mid_block
.
resnets
[
0
].
conv1
=
LoRACompatibleConv
(
**
conv1_kwargs
)
unet
.
mid_block
.
resnets
[
0
].
conv1
=
(
unet
.
mid_block
.
resnets
[
0
].
conv_shortcut
=
LoRACompatibleConv
(
**
conv_shortcut_args_kwargs
)
nn
.
Conv2d
(
**
conv1_kwargs
)
if
USE_PEFT_BACKEND
else
LoRACompatibleConv
(
**
conv1_kwargs
)
)
unet
.
mid_block
.
resnets
[
0
].
conv_shortcut
=
(
nn
.
Conv2d
(
**
conv_shortcut_args_kwargs
)
if
USE_PEFT_BACKEND
else
LoRACompatibleConv
(
**
conv_shortcut_args_kwargs
)
)
unet
.
mid_block
.
resnets
[
0
].
in_channels
+=
by
# surgery done here
unet
.
mid_block
.
resnets
[
0
].
in_channels
+=
by
# surgery done here
...
...
tests/pipelines/controlnetxs/test_controlnetxs.py
View file @
32ff4773
...
@@ -34,6 +34,7 @@ from diffusers.utils.testing_utils import (
...
@@ -34,6 +34,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism
,
enable_full_determinism
,
load_image
,
load_image
,
load_numpy
,
load_numpy
,
numpy_cosine_similarity_distance
,
require_python39_or_higher
,
require_python39_or_higher
,
require_torch_2
,
require_torch_2
,
require_torch_gpu
,
require_torch_gpu
,
...
@@ -273,7 +274,9 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
...
@@ -273,7 +274,9 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
original_image
=
image
[
-
3
:,
-
3
:,
-
1
].
flatten
()
original_image
=
image
[
-
3
:,
-
3
:,
-
1
].
flatten
()
expected_image
=
np
.
array
([
0.1274
,
0.1401
,
0.147
,
0.1185
,
0.1555
,
0.1492
,
0.1565
,
0.1474
,
0.1701
])
expected_image
=
np
.
array
([
0.1274
,
0.1401
,
0.147
,
0.1185
,
0.1555
,
0.1492
,
0.1565
,
0.1474
,
0.1701
])
assert
np
.
allclose
(
original_image
,
expected_image
,
atol
=
1e-04
)
max_diff
=
numpy_cosine_similarity_distance
(
original_image
,
expected_image
)
assert
max_diff
<
1e-4
def
test_depth
(
self
):
def
test_depth
(
self
):
controlnet
=
ControlNetXSModel
.
from_pretrained
(
"UmerHA/ConrolNetXS-SD2.1-depth"
)
controlnet
=
ControlNetXSModel
.
from_pretrained
(
"UmerHA/ConrolNetXS-SD2.1-depth"
)
...
@@ -298,7 +301,9 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
...
@@ -298,7 +301,9 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
original_image
=
image
[
-
3
:,
-
3
:,
-
1
].
flatten
()
original_image
=
image
[
-
3
:,
-
3
:,
-
1
].
flatten
()
expected_image
=
np
.
array
([
0.1098
,
0.1025
,
0.1211
,
0.1129
,
0.1165
,
0.1262
,
0.1185
,
0.1261
,
0.1703
])
expected_image
=
np
.
array
([
0.1098
,
0.1025
,
0.1211
,
0.1129
,
0.1165
,
0.1262
,
0.1185
,
0.1261
,
0.1703
])
assert
np
.
allclose
(
original_image
,
expected_image
,
atol
=
1e-04
)
max_diff
=
numpy_cosine_similarity_distance
(
original_image
,
expected_image
)
assert
max_diff
<
1e-4
@
require_python39_or_higher
@
require_python39_or_higher
@
require_torch_2
@
require_torch_2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment