Add ControlNetUnion (#10131)

* ControlNetUnion model

Add ControlNetUnion (#10131)
* ControlNetUnion model
914a585b · hlky · GitHub · ad40e265 · 914a585b · 914a585b
Unverified Commit 914a585b authored Dec 11, 2024 by hlky Committed by GitHub Dec 11, 2024
14 changed files
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -252,6 +252,8 @@
        title: SD3ControlNetModel
      - local: api/models/controlnet_sparsectrl
        title: SparseControlNetModel
+      - local: api/models/controlnet_union
+        title: ControlNetUnionModel
      title: ControlNets
    - sections:
      - local: api/models/allegro_transformer3d
@@ -368,6 +370,8 @@
      title: ControlNet-XS
    - local: api/pipelines/controlnetxs_sdxl
      title: ControlNet-XS with Stable Diffusion XL
+    - local: api/pipelines/controlnet_union
+      title: ControlNetUnion
    - local: api/pipelines/dance_diffusion
      title: Dance Diffusion
    - local: api/pipelines/ddim

--- a/docs/source/en/api/models/controlnet_union.md
+++ b/docs/source/en/api/models/controlnet_union.md
+<!--Copyright 2024 The HuggingFace Team and The InstantX Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# ControlNetUnionModel
+
+ControlNetUnionModel is an implementation of ControlNet for Stable Diffusion XL.
+
+The ControlNet model was introduced in [ControlNetPlus](https://github.com/xinsir6/ControlNetPlus) by xinsir6. It supports multiple conditioning inputs without increasing computation.
+
+*We design a new architecture that can support 10+ control types in condition text-to-image generation and can generate high resolution images visually comparable with midjourney. The network is based on the original ControlNet architecture, we propose two new modules to: 1 Extend the original ControlNet to support different image conditions using the same network parameter. 2 Support multiple conditions input without increasing computation offload, which is especially important for designers who want to edit image in detail, different conditions use the same condition encoder, without adding extra computations or parameters.*
+
+## Loading
+
+By default the [`ControlNetUnionModel`] should be loaded with [`~ModelMixin.from_pretrained`].
+
+```py
+from diffusers import StableDiffusionXLControlNetUnionPipeline, ControlNetUnionModel
+
+controlnet = ControlNetUnionModel.from_pretrained("xinsir/controlnet-union-sdxl-1.0")
+pipe = StableDiffusionXLControlNetUnionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet)
+```
+
+## ControlNetUnionModel
+
+[[autodoc]] ControlNetUnionModel
+
--- a/docs/source/en/api/pipelines/controlnet_union.md
+++ b/docs/source/en/api/pipelines/controlnet_union.md
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# ControlNetUnion
+
+ControlNetUnionModel is an implementation of ControlNet for Stable Diffusion XL.
+
+The ControlNet model was introduced in [ControlNetPlus](https://github.com/xinsir6/ControlNetPlus) by xinsir6. It supports multiple conditioning inputs without increasing computation.
+
+*We design a new architecture that can support 10+ control types in condition text-to-image generation and can generate high resolution images visually comparable with midjourney. The network is based on the original ControlNet architecture, we propose two new modules to: 1 Extend the original ControlNet to support different image conditions using the same network parameter. 2 Support multiple conditions input without increasing computation offload, which is especially important for designers who want to edit image in detail, different conditions use the same condition encoder, without adding extra computations or parameters.*
+
+
+## StableDiffusionXLControlNetUnionPipeline
+[[autodoc]] StableDiffusionXLControlNetUnionPipeline
+	- all
+	- __call__
+
+## StableDiffusionXLControlNetUnionImg2ImgPipeline
+[[autodoc]] StableDiffusionXLControlNetUnionImg2ImgPipeline
+	- all
+	- __call__
+
+## StableDiffusionXLControlNetUnionInpaintPipeline
+[[autodoc]] StableDiffusionXLControlNetUnionInpaintPipeline
+	- all
+	- __call__
--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -92,6 +92,7 @@ else:
            "CogView3PlusTransformer2DModel",
            "ConsistencyDecoderVAE",
            "ControlNetModel",
+            "ControlNetUnionModel",
            "ControlNetXSAdapter",
            "DiTTransformer2DModel",
            "FluxControlNetModel",
@@ -378,6 +379,9 @@ else:
            "StableDiffusionXLControlNetPAGImg2ImgPipeline",
            "StableDiffusionXLControlNetPAGPipeline",
            "StableDiffusionXLControlNetPipeline",
+            "StableDiffusionXLControlNetUnionImg2ImgPipeline",
+            "StableDiffusionXLControlNetUnionInpaintPipeline",
+            "StableDiffusionXLControlNetUnionPipeline",
            "StableDiffusionXLControlNetXSPipeline",
            "StableDiffusionXLImg2ImgPipeline",
            "StableDiffusionXLInpaintPipeline",
@@ -586,6 +590,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            CogView3PlusTransformer2DModel,
            ConsistencyDecoderVAE,
            ControlNetModel,
+            ControlNetUnionModel,
            ControlNetXSAdapter,
            DiTTransformer2DModel,
            FluxControlNetModel,
@@ -850,6 +855,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            StableDiffusionXLControlNetPAGImg2ImgPipeline,
            StableDiffusionXLControlNetPAGPipeline,
            StableDiffusionXLControlNetPipeline,
+            StableDiffusionXLControlNetUnionImg2ImgPipeline,
+            StableDiffusionXLControlNetUnionInpaintPipeline,
+            StableDiffusionXLControlNetUnionPipeline,
            StableDiffusionXLControlNetXSPipeline,
            StableDiffusionXLImg2ImgPipeline,
            StableDiffusionXLInpaintPipeline,

--- a/src/diffusers/models/__init__.py
+++ b/src/diffusers/models/__init__.py
@@ -45,6 +45,7 @@ if is_torch_available():
    ]
    _import_structure["controlnets.controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"]
    _import_structure["controlnets.controlnet_sparsectrl"] = ["SparseControlNetModel"]
+    _import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"]
    _import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
    _import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"]
    _import_structure["embeddings"] = ["ImageProjection"]
@@ -102,6 +103,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
        )
        from .controlnets import (
            ControlNetModel,
+            ControlNetUnionModel,
            ControlNetXSAdapter,
            FluxControlNetModel,
            FluxMultiControlNetModel,

--- a/src/diffusers/models/controlnets/__init__.py
+++ b/src/diffusers/models/controlnets/__init__.py
@@ -15,6 +15,7 @@ if is_torch_available():
        SparseControlNetModel,
        SparseControlNetOutput,
    )
+    from .controlnet_union import ControlNetUnionInput, ControlNetUnionInputProMax, ControlNetUnionModel
    from .controlnet_xs import ControlNetXSAdapter, ControlNetXSOutput, UNetControlNetXSModel
    from .multicontrolnet import MultiControlNetModel


--- a/src/diffusers/models/controlnets/controlnet_union.py
+++ b/src/diffusers/models/controlnets/controlnet_union.py
--- a/src/diffusers/pipelines/__init__.py
+++ b/src/diffusers/pipelines/__init__.py
@@ -162,6 +162,9 @@ else:
            "StableDiffusionXLControlNetImg2ImgPipeline",
            "StableDiffusionXLControlNetInpaintPipeline",
            "StableDiffusionXLControlNetPipeline",
+            "StableDiffusionXLControlNetUnionPipeline",
+            "StableDiffusionXLControlNetUnionInpaintPipeline",
+            "StableDiffusionXLControlNetUnionImg2ImgPipeline",
        ]
    )
    _import_structure["pag"].extend(
@@ -496,6 +499,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
            StableDiffusionXLControlNetImg2ImgPipeline,
            StableDiffusionXLControlNetInpaintPipeline,
            StableDiffusionXLControlNetPipeline,
+            StableDiffusionXLControlNetUnionImg2ImgPipeline,
+            StableDiffusionXLControlNetUnionInpaintPipeline,
+            StableDiffusionXLControlNetUnionPipeline,
        )
        from .controlnet_hunyuandit import (
            HunyuanDiTControlNetPipeline,

--- a/src/diffusers/pipelines/controlnet/__init__.py
+++ b/src/diffusers/pipelines/controlnet/__init__.py
@@ -30,6 +30,9 @@ else:
    _import_structure["pipeline_controlnet_inpaint_sd_xl"] = ["StableDiffusionXLControlNetInpaintPipeline"]
    _import_structure["pipeline_controlnet_sd_xl"] = ["StableDiffusionXLControlNetPipeline"]
    _import_structure["pipeline_controlnet_sd_xl_img2img"] = ["StableDiffusionXLControlNetImg2ImgPipeline"]
+    _import_structure["pipeline_controlnet_union_inpaint_sd_xl"] = ["StableDiffusionXLControlNetUnionInpaintPipeline"]
+    _import_structure["pipeline_controlnet_union_sd_xl"] = ["StableDiffusionXLControlNetUnionPipeline"]
+    _import_structure["pipeline_controlnet_union_sd_xl_img2img"] = ["StableDiffusionXLControlNetUnionImg2ImgPipeline"]
 try:
    if not (is_transformers_available() and is_flax_available()):
        raise OptionalDependencyNotAvailable()
@@ -57,6 +60,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
        from .pipeline_controlnet_inpaint_sd_xl import StableDiffusionXLControlNetInpaintPipeline
        from .pipeline_controlnet_sd_xl import StableDiffusionXLControlNetPipeline
        from .pipeline_controlnet_sd_xl_img2img import StableDiffusionXLControlNetImg2ImgPipeline
+        from .pipeline_controlnet_union_inpaint_sd_xl import StableDiffusionXLControlNetUnionInpaintPipeline
+        from .pipeline_controlnet_union_sd_xl import StableDiffusionXLControlNetUnionPipeline
+        from .pipeline_controlnet_union_sd_xl_img2img import StableDiffusionXLControlNetUnionImg2ImgPipeline

    try:
        if not (is_transformers_available() and is_flax_available()):

--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py
--- a/src/diffusers/utils/dummy_pt_objects.py
+++ b/src/diffusers/utils/dummy_pt_objects.py
@@ -227,6 +227,21 @@ class ControlNetModel(metaclass=DummyObject):
        requires_backends(cls, ["torch"])


+class ControlNetUnionModel(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+
 class ControlNetXSAdapter(metaclass=DummyObject):
    _backends = ["torch"]


--- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py
+++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py
@@ -1982,6 +1982,51 @@ class StableDiffusionXLControlNetPipeline(metaclass=DummyObject):
        requires_backends(cls, ["torch", "transformers"])


+class StableDiffusionXLControlNetUnionImg2ImgPipeline(metaclass=DummyObject):
+    _backends = ["torch", "transformers"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch", "transformers"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+
+
+class StableDiffusionXLControlNetUnionInpaintPipeline(metaclass=DummyObject):
+    _backends = ["torch", "transformers"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch", "transformers"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+
+
+class StableDiffusionXLControlNetUnionPipeline(metaclass=DummyObject):
+    _backends = ["torch", "transformers"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch", "transformers"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+
+
 class StableDiffusionXLControlNetXSPipeline(metaclass=DummyObject):
    _backends = ["torch", "transformers"]