"git@developer.sourcefind.cn:change/sglang.git" did not exist on "e0b2d3eebebd3d4efc7e323ad2dee605b607f394"
Unverified Commit 561ab54d authored by Frank (Haofan) Wang's avatar Frank (Haofan) Wang Committed by GitHub
Browse files

Support ControlNet for Qwen-Image (#12215)



* support qwen-image-cn-union


---------
Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarYiYi Xu <yixu310@gmail.com>
parent b60faf45
...@@ -120,6 +120,10 @@ The `guidance_scale` parameter in the pipeline is there to support future guidan ...@@ -120,6 +120,10 @@ The `guidance_scale` parameter in the pipeline is there to support future guidan
- all - all
- __call__ - __call__
## QwenImaggeControlNetPipeline
- all
- __call__
## QwenImagePipelineOutput ## QwenImagePipelineOutput
[[autodoc]] pipelines.qwenimage.pipeline_output.QwenImagePipelineOutput [[autodoc]] pipelines.qwenimage.pipeline_output.QwenImagePipelineOutput
\ No newline at end of file
...@@ -218,6 +218,8 @@ else: ...@@ -218,6 +218,8 @@ else:
"OmniGenTransformer2DModel", "OmniGenTransformer2DModel",
"PixArtTransformer2DModel", "PixArtTransformer2DModel",
"PriorTransformer", "PriorTransformer",
"QwenImageControlNetModel",
"QwenImageMultiControlNetModel",
"QwenImageTransformer2DModel", "QwenImageTransformer2DModel",
"SanaControlNetModel", "SanaControlNetModel",
"SanaTransformer2DModel", "SanaTransformer2DModel",
...@@ -491,6 +493,7 @@ else: ...@@ -491,6 +493,7 @@ else:
"PixArtAlphaPipeline", "PixArtAlphaPipeline",
"PixArtSigmaPAGPipeline", "PixArtSigmaPAGPipeline",
"PixArtSigmaPipeline", "PixArtSigmaPipeline",
"QwenImageControlNetPipeline",
"QwenImageEditPipeline", "QwenImageEditPipeline",
"QwenImageImg2ImgPipeline", "QwenImageImg2ImgPipeline",
"QwenImageInpaintPipeline", "QwenImageInpaintPipeline",
...@@ -885,6 +888,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -885,6 +888,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
OmniGenTransformer2DModel, OmniGenTransformer2DModel,
PixArtTransformer2DModel, PixArtTransformer2DModel,
PriorTransformer, PriorTransformer,
QwenImageControlNetModel,
QwenImageMultiControlNetModel,
QwenImageTransformer2DModel, QwenImageTransformer2DModel,
SanaControlNetModel, SanaControlNetModel,
SanaTransformer2DModel, SanaTransformer2DModel,
...@@ -1128,6 +1133,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -1128,6 +1133,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
PixArtAlphaPipeline, PixArtAlphaPipeline,
PixArtSigmaPAGPipeline, PixArtSigmaPAGPipeline,
PixArtSigmaPipeline, PixArtSigmaPipeline,
QwenImageControlNetPipeline,
QwenImageEditPipeline, QwenImageEditPipeline,
QwenImageImg2ImgPipeline, QwenImageImg2ImgPipeline,
QwenImageInpaintPipeline, QwenImageInpaintPipeline,
......
...@@ -52,6 +52,10 @@ if is_torch_available(): ...@@ -52,6 +52,10 @@ if is_torch_available():
"HunyuanDiT2DControlNetModel", "HunyuanDiT2DControlNetModel",
"HunyuanDiT2DMultiControlNetModel", "HunyuanDiT2DMultiControlNetModel",
] ]
_import_structure["controlnets.controlnet_qwenimage"] = [
"QwenImageControlNetModel",
"QwenImageMultiControlNetModel",
]
_import_structure["controlnets.controlnet_sana"] = ["SanaControlNetModel"] _import_structure["controlnets.controlnet_sana"] = ["SanaControlNetModel"]
_import_structure["controlnets.controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"] _import_structure["controlnets.controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"]
_import_structure["controlnets.controlnet_sparsectrl"] = ["SparseControlNetModel"] _import_structure["controlnets.controlnet_sparsectrl"] = ["SparseControlNetModel"]
...@@ -148,6 +152,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -148,6 +152,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
HunyuanDiT2DMultiControlNetModel, HunyuanDiT2DMultiControlNetModel,
MultiControlNetModel, MultiControlNetModel,
MultiControlNetUnionModel, MultiControlNetUnionModel,
QwenImageControlNetModel,
QwenImageMultiControlNetModel,
SanaControlNetModel, SanaControlNetModel,
SD3ControlNetModel, SD3ControlNetModel,
SD3MultiControlNetModel, SD3MultiControlNetModel,
......
...@@ -9,6 +9,7 @@ if is_torch_available(): ...@@ -9,6 +9,7 @@ if is_torch_available():
HunyuanDiT2DControlNetModel, HunyuanDiT2DControlNetModel,
HunyuanDiT2DMultiControlNetModel, HunyuanDiT2DMultiControlNetModel,
) )
from .controlnet_qwenimage import QwenImageControlNetModel, QwenImageMultiControlNetModel
from .controlnet_sana import SanaControlNetModel from .controlnet_sana import SanaControlNetModel
from .controlnet_sd3 import SD3ControlNetModel, SD3ControlNetOutput, SD3MultiControlNetModel from .controlnet_sd3 import SD3ControlNetModel, SD3ControlNetOutput, SD3MultiControlNetModel
from .controlnet_sparsectrl import ( from .controlnet_sparsectrl import (
......
# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Union
import torch
import torch.nn as nn
from ...configuration_utils import ConfigMixin, register_to_config
from ...loaders import FromOriginalModelMixin, PeftAdapterMixin
from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers
from ..attention_processor import AttentionProcessor
from ..cache_utils import CacheMixin
from ..controlnets.controlnet import zero_module
from ..modeling_outputs import Transformer2DModelOutput
from ..modeling_utils import ModelMixin
from ..transformers.transformer_qwenimage import (
QwenEmbedRope,
QwenImageTransformerBlock,
QwenTimestepProjEmbeddings,
RMSNorm,
)
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@dataclass
class QwenImageControlNetOutput(BaseOutput):
controlnet_block_samples: Tuple[torch.Tensor]
class QwenImageControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin):
_supports_gradient_checkpointing = True
@register_to_config
def __init__(
self,
patch_size: int = 2,
in_channels: int = 64,
out_channels: Optional[int] = 16,
num_layers: int = 60,
attention_head_dim: int = 128,
num_attention_heads: int = 24,
joint_attention_dim: int = 3584,
axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
extra_condition_channels: int = 0, # for controlnet-inpainting
):
super().__init__()
self.out_channels = out_channels or in_channels
self.inner_dim = num_attention_heads * attention_head_dim
self.pos_embed = QwenEmbedRope(theta=10000, axes_dim=list(axes_dims_rope), scale_rope=True)
self.time_text_embed = QwenTimestepProjEmbeddings(embedding_dim=self.inner_dim)
self.txt_norm = RMSNorm(joint_attention_dim, eps=1e-6)
self.img_in = nn.Linear(in_channels, self.inner_dim)
self.txt_in = nn.Linear(joint_attention_dim, self.inner_dim)
self.transformer_blocks = nn.ModuleList(
[
QwenImageTransformerBlock(
dim=self.inner_dim,
num_attention_heads=num_attention_heads,
attention_head_dim=attention_head_dim,
)
for _ in range(num_layers)
]
)
# controlnet_blocks
self.controlnet_blocks = nn.ModuleList([])
for _ in range(len(self.transformer_blocks)):
self.controlnet_blocks.append(zero_module(nn.Linear(self.inner_dim, self.inner_dim)))
self.controlnet_x_embedder = zero_module(
torch.nn.Linear(in_channels + extra_condition_channels, self.inner_dim)
)
self.gradient_checkpointing = False
@property
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.attn_processors
def attn_processors(self):
r"""
Returns:
`dict` of attention processors: A dictionary containing all attention processors used in the model with
indexed by its weight name.
"""
# set recursively
processors = {}
def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
if hasattr(module, "get_processor"):
processors[f"{name}.processor"] = module.get_processor()
for sub_name, child in module.named_children():
fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
return processors
for name, module in self.named_children():
fn_recursive_add_processors(name, module, processors)
return processors
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attn_processor
def set_attn_processor(self, processor):
r"""
Sets the attention processor to use to compute attention.
Parameters:
processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
The instantiated processor class or a dictionary of processor classes that will be set as the processor
for **all** `Attention` layers.
If `processor` is a dict, the key needs to define the path to the corresponding cross attention
processor. This is strongly recommended when setting trainable attention processors.
"""
count = len(self.attn_processors.keys())
if isinstance(processor, dict) and len(processor) != count:
raise ValueError(
f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
)
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
else:
module.set_processor(processor.pop(f"{name}.processor"))
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
for name, module in self.named_children():
fn_recursive_attn_processor(name, module, processor)
@classmethod
def from_transformer(
cls,
transformer,
num_layers: int = 5,
attention_head_dim: int = 128,
num_attention_heads: int = 24,
load_weights_from_transformer=True,
extra_condition_channels: int = 0,
):
config = dict(transformer.config)
config["num_layers"] = num_layers
config["attention_head_dim"] = attention_head_dim
config["num_attention_heads"] = num_attention_heads
config["extra_condition_channels"] = extra_condition_channels
controlnet = cls.from_config(config)
if load_weights_from_transformer:
controlnet.pos_embed.load_state_dict(transformer.pos_embed.state_dict())
controlnet.time_text_embed.load_state_dict(transformer.time_text_embed.state_dict())
controlnet.img_in.load_state_dict(transformer.img_in.state_dict())
controlnet.txt_in.load_state_dict(transformer.txt_in.state_dict())
controlnet.transformer_blocks.load_state_dict(transformer.transformer_blocks.state_dict(), strict=False)
controlnet.controlnet_x_embedder = zero_module(controlnet.controlnet_x_embedder)
return controlnet
def forward(
self,
hidden_states: torch.Tensor,
controlnet_cond: torch.Tensor,
conditioning_scale: float = 1.0,
encoder_hidden_states: torch.Tensor = None,
encoder_hidden_states_mask: torch.Tensor = None,
timestep: torch.LongTensor = None,
img_shapes: Optional[List[Tuple[int, int, int]]] = None,
txt_seq_lens: Optional[List[int]] = None,
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
return_dict: bool = True,
) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
"""
The [`FluxTransformer2DModel`] forward method.
Args:
hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
Input `hidden_states`.
controlnet_cond (`torch.Tensor`):
The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`.
conditioning_scale (`float`, defaults to `1.0`):
The scale factor for ControlNet outputs.
encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
from the embeddings of input conditions.
timestep ( `torch.LongTensor`):
Used to indicate denoising step.
block_controlnet_hidden_states: (`list` of `torch.Tensor`):
A list of tensors that if specified are added to the residuals of transformer blocks.
joint_attention_kwargs (`dict`, *optional*):
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
`self.processor` in
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
tuple.
Returns:
If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
`tuple` where the first element is the sample tensor.
"""
if joint_attention_kwargs is not None:
joint_attention_kwargs = joint_attention_kwargs.copy()
lora_scale = joint_attention_kwargs.pop("scale", 1.0)
else:
lora_scale = 1.0
if USE_PEFT_BACKEND:
# weight the lora layers by setting `lora_scale` for each PEFT layer
scale_lora_layers(self, lora_scale)
else:
if joint_attention_kwargs is not None and joint_attention_kwargs.get("scale", None) is not None:
logger.warning(
"Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
)
hidden_states = self.img_in(hidden_states)
# add
hidden_states = hidden_states + self.controlnet_x_embedder(controlnet_cond)
temb = self.time_text_embed(timestep, hidden_states)
image_rotary_emb = self.pos_embed(img_shapes, txt_seq_lens, device=hidden_states.device)
timestep = timestep.to(hidden_states.dtype)
encoder_hidden_states = self.txt_norm(encoder_hidden_states)
encoder_hidden_states = self.txt_in(encoder_hidden_states)
block_samples = ()
for index_block, block in enumerate(self.transformer_blocks):
if torch.is_grad_enabled() and self.gradient_checkpointing:
encoder_hidden_states, hidden_states = self._gradient_checkpointing_func(
block,
hidden_states,
encoder_hidden_states,
encoder_hidden_states_mask,
temb,
image_rotary_emb,
)
else:
encoder_hidden_states, hidden_states = block(
hidden_states=hidden_states,
encoder_hidden_states=encoder_hidden_states,
encoder_hidden_states_mask=encoder_hidden_states_mask,
temb=temb,
image_rotary_emb=image_rotary_emb,
joint_attention_kwargs=joint_attention_kwargs,
)
block_samples = block_samples + (hidden_states,)
# controlnet block
controlnet_block_samples = ()
for block_sample, controlnet_block in zip(block_samples, self.controlnet_blocks):
block_sample = controlnet_block(block_sample)
controlnet_block_samples = controlnet_block_samples + (block_sample,)
# scaling
controlnet_block_samples = [sample * conditioning_scale for sample in controlnet_block_samples]
controlnet_block_samples = None if len(controlnet_block_samples) == 0 else controlnet_block_samples
if USE_PEFT_BACKEND:
# remove `lora_scale` from each PEFT layer
unscale_lora_layers(self, lora_scale)
if not return_dict:
return controlnet_block_samples
return QwenImageControlNetOutput(
controlnet_block_samples=controlnet_block_samples,
)
class QwenImageMultiControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin):
r"""
`QwenImageMultiControlNetModel` wrapper class for Multi-QwenImageControlNetModel
This module is a wrapper for multiple instances of the `QwenImageControlNetModel`. The `forward()` API is designed
to be compatible with `QwenImageControlNetModel`.
Args:
controlnets (`List[QwenImageControlNetModel]`):
Provides additional conditioning to the unet during the denoising process. You must set multiple
`QwenImageControlNetModel` as a list.
"""
def __init__(self, controlnets):
super().__init__()
self.nets = nn.ModuleList(controlnets)
def forward(
self,
hidden_states: torch.FloatTensor,
controlnet_cond: List[torch.tensor],
conditioning_scale: List[float],
encoder_hidden_states: torch.Tensor = None,
encoder_hidden_states_mask: torch.Tensor = None,
timestep: torch.LongTensor = None,
img_shapes: Optional[List[Tuple[int, int, int]]] = None,
txt_seq_lens: Optional[List[int]] = None,
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
return_dict: bool = True,
) -> Union[QwenImageControlNetOutput, Tuple]:
# ControlNet-Union with multiple conditions
# only load one ControlNet for saving memories
if len(self.nets) == 1:
controlnet = self.nets[0]
for i, (image, scale) in enumerate(zip(controlnet_cond, conditioning_scale)):
block_samples = controlnet(
hidden_states=hidden_states,
controlnet_cond=image,
conditioning_scale=scale,
encoder_hidden_states=encoder_hidden_states,
encoder_hidden_states_mask=encoder_hidden_states_mask,
timestep=timestep,
img_shapes=img_shapes,
txt_seq_lens=txt_seq_lens,
joint_attention_kwargs=joint_attention_kwargs,
return_dict=return_dict,
)
# merge samples
if i == 0:
control_block_samples = block_samples
else:
if block_samples is not None and control_block_samples is not None:
control_block_samples = [
control_block_sample + block_sample
for control_block_sample, block_sample in zip(control_block_samples, block_samples)
]
else:
raise ValueError("QwenImageMultiControlNetModel only supports a single controlnet-union now.")
return control_block_samples
...@@ -16,6 +16,7 @@ import functools ...@@ -16,6 +16,7 @@ import functools
import math import math
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
...@@ -552,6 +553,7 @@ class QwenImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, Fro ...@@ -552,6 +553,7 @@ class QwenImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, Fro
txt_seq_lens: Optional[List[int]] = None, txt_seq_lens: Optional[List[int]] = None,
guidance: torch.Tensor = None, # TODO: this should probably be removed guidance: torch.Tensor = None, # TODO: this should probably be removed
attention_kwargs: Optional[Dict[str, Any]] = None, attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_block_samples=None,
return_dict: bool = True, return_dict: bool = True,
) -> Union[torch.Tensor, Transformer2DModelOutput]: ) -> Union[torch.Tensor, Transformer2DModelOutput]:
""" """
...@@ -631,6 +633,12 @@ class QwenImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, Fro ...@@ -631,6 +633,12 @@ class QwenImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, Fro
joint_attention_kwargs=attention_kwargs, joint_attention_kwargs=attention_kwargs,
) )
# controlnet residual
if controlnet_block_samples is not None:
interval_control = len(self.transformer_blocks) / len(controlnet_block_samples)
interval_control = int(np.ceil(interval_control))
hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control]
# Use only the image part (hidden_states) from the dual-stream blocks # Use only the image part (hidden_states) from the dual-stream blocks
hidden_states = self.norm_out(hidden_states, temb) hidden_states = self.norm_out(hidden_states, temb)
output = self.proj_out(hidden_states) output = self.proj_out(hidden_states)
......
...@@ -209,7 +209,7 @@ class ComponentSpec: ...@@ -209,7 +209,7 @@ class ComponentSpec:
# Get all loading fields in order # Get all loading fields in order
loading_fields = cls.loading_fields() loading_fields = cls.loading_fields()
result = {f: None for f in loading_fields} result = dict.fromkeys(loading_fields)
if load_id == "null": if load_id == "null":
return result return result
......
...@@ -393,6 +393,7 @@ else: ...@@ -393,6 +393,7 @@ else:
"QwenImageImg2ImgPipeline", "QwenImageImg2ImgPipeline",
"QwenImageInpaintPipeline", "QwenImageInpaintPipeline",
"QwenImageEditPipeline", "QwenImageEditPipeline",
"QwenImageControlNetPipeline",
] ]
try: try:
if not is_onnx_available(): if not is_onnx_available():
...@@ -712,6 +713,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -712,6 +713,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
from .pia import PIAPipeline from .pia import PIAPipeline
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
from .qwenimage import ( from .qwenimage import (
QwenImageControlNetPipeline,
QwenImageEditPipeline, QwenImageEditPipeline,
QwenImageImg2ImgPipeline, QwenImageImg2ImgPipeline,
QwenImageInpaintPipeline, QwenImageInpaintPipeline,
......
...@@ -24,6 +24,7 @@ except OptionalDependencyNotAvailable: ...@@ -24,6 +24,7 @@ except OptionalDependencyNotAvailable:
else: else:
_import_structure["modeling_qwenimage"] = ["ReduxImageEncoder"] _import_structure["modeling_qwenimage"] = ["ReduxImageEncoder"]
_import_structure["pipeline_qwenimage"] = ["QwenImagePipeline"] _import_structure["pipeline_qwenimage"] = ["QwenImagePipeline"]
_import_structure["pipeline_qwenimage_controlnet"] = ["QwenImageControlNetPipeline"]
_import_structure["pipeline_qwenimage_edit"] = ["QwenImageEditPipeline"] _import_structure["pipeline_qwenimage_edit"] = ["QwenImageEditPipeline"]
_import_structure["pipeline_qwenimage_img2img"] = ["QwenImageImg2ImgPipeline"] _import_structure["pipeline_qwenimage_img2img"] = ["QwenImageImg2ImgPipeline"]
_import_structure["pipeline_qwenimage_inpaint"] = ["QwenImageInpaintPipeline"] _import_structure["pipeline_qwenimage_inpaint"] = ["QwenImageInpaintPipeline"]
...@@ -36,6 +37,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -36,6 +37,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403 from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
else: else:
from .pipeline_qwenimage import QwenImagePipeline from .pipeline_qwenimage import QwenImagePipeline
from .pipeline_qwenimage_controlnet import QwenImageControlNetPipeline
from .pipeline_qwenimage_edit import QwenImageEditPipeline from .pipeline_qwenimage_edit import QwenImageEditPipeline
from .pipeline_qwenimage_img2img import QwenImageImg2ImgPipeline from .pipeline_qwenimage_img2img import QwenImageImg2ImgPipeline
from .pipeline_qwenimage_inpaint import QwenImageInpaintPipeline from .pipeline_qwenimage_inpaint import QwenImageInpaintPipeline
......
...@@ -1083,6 +1083,36 @@ class PriorTransformer(metaclass=DummyObject): ...@@ -1083,6 +1083,36 @@ class PriorTransformer(metaclass=DummyObject):
requires_backends(cls, ["torch"]) requires_backends(cls, ["torch"])
class QwenImageControlNetModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
class QwenImageMultiControlNetModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
class QwenImageTransformer2DModel(metaclass=DummyObject): class QwenImageTransformer2DModel(metaclass=DummyObject):
_backends = ["torch"] _backends = ["torch"]
......
...@@ -1757,6 +1757,21 @@ class PixArtSigmaPipeline(metaclass=DummyObject): ...@@ -1757,6 +1757,21 @@ class PixArtSigmaPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"]) requires_backends(cls, ["torch", "transformers"])
class QwenImageControlNetPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class QwenImageEditPipeline(metaclass=DummyObject): class QwenImageEditPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"] _backends = ["torch", "transformers"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment