Unverified Commit 76d4e416 authored by YiYi Xu's avatar YiYi Xu Committed by GitHub
Browse files

[modular]some small fix (#12307)

* fix

* add mellon node registry

* style

* update docstring to include more info!

* support custom node mellon

* HTTPErrpr -> HfHubHTTPErrpr

* up

* Update src/diffusers/modular_pipelines/qwenimage/node_utils.py
parent c07fcf78
......@@ -32,6 +32,8 @@ class FluxModularPipeline(ModularPipeline, FluxLoraLoaderMixin, TextualInversion
</Tip>
"""
default_blocks_name = "FluxAutoBlocks"
@property
def default_height(self):
return self.default_sample_size * self.vae_scale_factor
......
This diff is collapsed.
......@@ -51,6 +51,7 @@ if is_accelerate_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
# map regular pipeline to modular pipeline class name
MODULAR_PIPELINE_MAPPING = OrderedDict(
[
("stable-diffusion-xl", "StableDiffusionXLModularPipeline"),
......@@ -61,16 +62,6 @@ MODULAR_PIPELINE_MAPPING = OrderedDict(
]
)
MODULAR_PIPELINE_BLOCKS_MAPPING = OrderedDict(
[
("StableDiffusionXLModularPipeline", "StableDiffusionXLAutoBlocks"),
("WanModularPipeline", "WanAutoBlocks"),
("FluxModularPipeline", "FluxAutoBlocks"),
("QwenImageModularPipeline", "QwenImageAutoBlocks"),
("QwenImageEditModularPipeline", "QwenImageEditAutoBlocks"),
]
)
@dataclass
class PipelineState:
......@@ -423,7 +414,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
state.set(input_param.name, param, input_param.kwargs_type)
elif input_param.kwargs_type:
# if it is a kwargs type, e.g. "guider_input_fields", it is likely to be a list of parameters
# if it is a kwargs type, e.g. "denoiser_input_fields", it is likely to be a list of parameters
# we need to first find out which inputs are and loop through them.
intermediate_kwargs = state.get_by_kwargs(input_param.kwargs_type)
for param_name, current_value in intermediate_kwargs.items():
......@@ -1454,6 +1445,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
config_name = "modular_model_index.json"
hf_device_map = None
default_blocks_name = None
# YiYi TODO: add warning for passing multiple ComponentSpec/ConfigSpec with the same name
def __init__(
......@@ -1514,7 +1506,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
`_blocks_class_name` in the config dict
"""
if blocks is None:
blocks_class_name = MODULAR_PIPELINE_BLOCKS_MAPPING.get(self.__class__.__name__)
blocks_class_name = self.default_blocks_name
if blocks_class_name is not None:
diffusers_module = importlib.import_module("diffusers")
blocks_class = getattr(diffusers_module, blocks_class_name)
......
This diff is collapsed.
......@@ -577,9 +577,8 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
def inputs(self) -> List[InputParam]:
return [
InputParam(name="batch_size", required=True),
InputParam(
name="resized_image", required=True, type_hint=torch.Tensor, description="The resized image input"
),
InputParam(name="image_height", required=True),
InputParam(name="image_width", required=True),
InputParam(name="height", required=True),
InputParam(name="width", required=True),
InputParam(name="prompt_embeds_mask"),
......@@ -612,10 +611,6 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
block_state = self.get_block_state(state)
# for edit, image size can be different from the target size (height/width)
image = (
block_state.resized_image[0] if isinstance(block_state.resized_image, list) else block_state.resized_image
)
image_width, image_height = image.size
block_state.img_shapes = [
[
......@@ -624,7 +619,11 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
block_state.height // components.vae_scale_factor // 2,
block_state.width // components.vae_scale_factor // 2,
),
(1, image_height // components.vae_scale_factor // 2, image_width // components.vae_scale_factor // 2),
(
1,
block_state.image_height // components.vae_scale_factor // 2,
block_state.image_width // components.vae_scale_factor // 2,
),
]
] * block_state.batch_size
......
......@@ -496,7 +496,7 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
)
if components.requires_unconditional_embeds:
negative_prompt = block_state.negative_prompt or ""
negative_prompt = block_state.negative_prompt or " "
block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(
components.text_encoder,
components.processor,
......
......@@ -307,6 +307,13 @@ class QwenImageInputsDynamicStep(ModularPipelineBlocks):
return inputs
@property
def intermediate_outputs(self) -> List[OutputParam]:
return [
OutputParam(name="image_height", type_hint=int, description="The height of the image latents"),
OutputParam(name="image_width", type_hint=int, description="The width of the image latents"),
]
@property
def expected_components(self) -> List[ComponentSpec]:
return [
......@@ -327,6 +334,11 @@ class QwenImageInputsDynamicStep(ModularPipelineBlocks):
block_state.height = block_state.height or height
block_state.width = block_state.width or width
if not hasattr(block_state, "image_height"):
block_state.image_height = height
if not hasattr(block_state, "image_width"):
block_state.image_width = width
# 2. Patchify the image latent tensor
image_latent_tensor = components.pachifier.pack_latents(image_latent_tensor)
......
......@@ -511,17 +511,42 @@ class QwenImageAutoDecodeStep(AutoPipelineBlocks):
)
class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
model_name = "qwenimage"
block_classes = [
QwenImageAutoInputStep,
QwenImageOptionalControlNetInputStep,
QwenImageAutoBeforeDenoiseStep,
QwenImageOptionalControlNetBeforeDenoiseStep,
QwenImageAutoDenoiseStep,
]
block_names = ["input", "controlnet_input", "before_denoise", "controlnet_before_denoise", "denoise", "decode"]
@property
def description(self):
return (
"Core step that performs the denoising process. \n"
+ " - `QwenImageAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
+ " - `QwenImageOptionalControlNetInputStep` (controlnet_input) prepares the controlnet input.\n"
+ " - `QwenImageAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+ " - `QwenImageOptionalControlNetBeforeDenoiseStep` (controlnet_before_denoise) prepares the controlnet input for the denoising step.\n"
+ " - `QwenImageAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
+ " - `QwenImageAutoDecodeStep` (decode) decodes the latents into images.\n\n"
+ "This step support text-to-image, image-to-image, inpainting, and controlnet tasks for QwenImage:\n"
+ " - for image-to-image generation, you need to provide `image_latents`\n"
+ " - for inpainting, you need to provide `processed_mask_image` and `image_latents`\n"
+ " - to run the controlnet workflow, you need to provide `control_image_latents`\n"
+ " - for text-to-image generation, all you need to provide is prompt embeddings"
)
## 1.10 QwenImage/auto block & presets
AUTO_BLOCKS = InsertableDict(
[
("text_encoder", QwenImageTextEncoderStep()),
("vae_encoder", QwenImageAutoVaeEncoderStep()),
("controlnet_vae_encoder", QwenImageOptionalControlNetVaeEncoderStep()),
("input", QwenImageAutoInputStep()),
("controlnet_input", QwenImageOptionalControlNetInputStep()),
("before_denoise", QwenImageAutoBeforeDenoiseStep()),
("controlnet_before_denoise", QwenImageOptionalControlNetBeforeDenoiseStep()),
("denoise", QwenImageAutoDenoiseStep()),
("denoise", QwenImageCoreDenoiseStep()),
("decode", QwenImageAutoDecodeStep()),
]
)
......@@ -699,7 +724,7 @@ class QwenImageEditAutoVaeEncoderStep(AutoPipelineBlocks):
class QwenImageEditAutoInputStep(AutoPipelineBlocks):
block_classes = [QwenImageInpaintInputStep, QwenImageEditInputStep]
block_names = ["edit_inpaint", "edit"]
block_trigger_inputs = ["processed_mask_image", "image"]
block_trigger_inputs = ["processed_mask_image", "image_latents"]
@property
def description(self):
......@@ -800,13 +825,34 @@ class QwenImageEditAutoDenoiseStep(AutoPipelineBlocks):
## 2.7 QwenImage-Edit/auto blocks & presets
class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
model_name = "qwenimage-edit"
block_classes = [
QwenImageEditAutoInputStep,
QwenImageEditAutoBeforeDenoiseStep,
QwenImageEditAutoDenoiseStep,
]
block_names = ["input", "before_denoise", "denoise"]
@property
def description(self):
return (
"Core step that performs the denoising process. \n"
+ " - `QwenImageEditAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
+ " - `QwenImageEditAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+ " - `QwenImageEditAutoDenoiseStep` (denoise) iteratively denoises the latents.\n\n"
+ "This step support edit (img2img) and edit inpainting workflow for QwenImage Edit:\n"
+ " - When `processed_mask_image` is provided, it will be used for edit inpainting task.\n"
+ " - When `image_latents` is provided, it will be used for edit (img2img) task.\n"
)
EDIT_AUTO_BLOCKS = InsertableDict(
[
("text_encoder", QwenImageEditVLEncoderStep()),
("vae_encoder", QwenImageEditAutoVaeEncoderStep()),
("input", QwenImageEditAutoInputStep()),
("before_denoise", QwenImageEditAutoBeforeDenoiseStep()),
("denoise", QwenImageEditAutoDenoiseStep()),
("denoise", QwenImageEditCoreDenoiseStep()),
("decode", QwenImageAutoDecodeStep()),
]
)
......
......@@ -104,6 +104,8 @@ class QwenImageModularPipeline(ModularPipeline, QwenImageLoraLoaderMixin):
</Tip>
"""
default_blocks_name = "QwenImageAutoBlocks"
@property
def default_height(self):
return self.default_sample_size * self.vae_scale_factor
......@@ -158,6 +160,8 @@ class QwenImageEditModularPipeline(ModularPipeline, QwenImageLoraLoaderMixin):
</Tip>
"""
default_blocks_name = "QwenImageEditAutoBlocks"
# YiYi TODO: qwen edit should not provide default height/width, should be derived from the resized input image (after adjustment) produced by the resize step.
@property
def default_height(self):
......
# Copyright 2025 Qwen-Image Team and The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# mellon nodes
QwenImage_NODE_TYPES_PARAMS_MAP = {
"controlnet": {
"inputs": [
"control_image",
"controlnet_conditioning_scale",
"control_guidance_start",
"control_guidance_end",
"height",
"width",
],
"model_inputs": [
"controlnet",
"vae",
],
"outputs": [
"controlnet_out",
],
"block_names": ["controlnet_vae_encoder"],
},
"denoise": {
"inputs": [
"embeddings",
"width",
"height",
"seed",
"num_inference_steps",
"guidance_scale",
"image_latents",
"strength",
"controlnet",
],
"model_inputs": [
"unet",
"guider",
"scheduler",
],
"outputs": [
"latents",
"latents_preview",
],
"block_names": ["denoise"],
},
"vae_encoder": {
"inputs": [
"image",
"width",
"height",
],
"model_inputs": [
"vae",
],
"outputs": [
"image_latents",
],
},
"text_encoder": {
"inputs": [
"prompt",
"negative_prompt",
],
"model_inputs": [
"text_encoders",
],
"outputs": [
"embeddings",
],
},
"decoder": {
"inputs": [
"latents",
],
"model_inputs": [
"vae",
],
"outputs": [
"images",
],
},
}
......@@ -262,37 +262,37 @@ class StableDiffusionXLInputStep(ModularPipelineBlocks):
OutputParam(
"prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="text embeddings used to guide the image generation",
),
OutputParam(
"negative_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="negative text embeddings used to guide the image generation",
),
OutputParam(
"pooled_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="pooled text embeddings used to guide the image generation",
),
OutputParam(
"negative_pooled_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="negative pooled text embeddings used to guide the image generation",
),
OutputParam(
"ip_adapter_embeds",
type_hint=List[torch.Tensor],
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="image embeddings for IP-Adapter",
),
OutputParam(
"negative_ip_adapter_embeds",
type_hint=List[torch.Tensor],
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="negative image embeddings for IP-Adapter",
),
]
......@@ -1120,13 +1120,13 @@ class StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep(ModularPipelineB
OutputParam(
"add_time_ids",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="The time ids to condition the denoising process",
),
OutputParam(
"negative_add_time_ids",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="The negative time ids to condition the denoising process",
),
OutputParam("timestep_cond", type_hint=torch.Tensor, description="The timestep cond to use for LCM"),
......@@ -1331,13 +1331,13 @@ class StableDiffusionXLPrepareAdditionalConditioningStep(ModularPipelineBlocks):
OutputParam(
"add_time_ids",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="The time ids to condition the denoising process",
),
OutputParam(
"negative_add_time_ids",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="The negative time ids to condition the denoising process",
),
OutputParam("timestep_cond", type_hint=torch.Tensor, description="The timestep cond to use for LCM"),
......
......@@ -183,14 +183,14 @@ class StableDiffusionXLLoopDenoiser(ModularPipelineBlocks):
description="The guidance scale embedding to use for Latent Consistency Models(LCMs). Can be generated in prepare_additional_conditioning step.",
),
InputParam(
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description=(
"All conditional model inputs that need to be prepared with guider. "
"It should contain prompt_embeds/negative_prompt_embeds, "
"add_time_ids/negative_add_time_ids, "
"pooled_prompt_embeds/negative_pooled_prompt_embeds, "
"and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
"please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
"please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
),
),
]
......@@ -307,14 +307,14 @@ class StableDiffusionXLControlNetLoopDenoiser(ModularPipelineBlocks):
description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
),
InputParam(
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description=(
"All conditional model inputs that need to be prepared with guider. "
"It should contain prompt_embeds/negative_prompt_embeds, "
"add_time_ids/negative_add_time_ids, "
"pooled_prompt_embeds/negative_pooled_prompt_embeds, "
"and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
"please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
"please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
),
),
InputParam(
......
......@@ -258,25 +258,25 @@ class StableDiffusionXLTextEncoderStep(ModularPipelineBlocks):
OutputParam(
"prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="text embeddings used to guide the image generation",
),
OutputParam(
"negative_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="negative text embeddings used to guide the image generation",
),
OutputParam(
"pooled_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="pooled text embeddings used to guide the image generation",
),
OutputParam(
"negative_pooled_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="negative pooled text embeddings used to guide the image generation",
),
]
......
......@@ -82,19 +82,17 @@ class StableDiffusionXLAutoIPAdapterStep(AutoPipelineBlocks):
# before_denoise: text2img
class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
block_classes = [
StableDiffusionXLInputStep,
StableDiffusionXLSetTimestepsStep,
StableDiffusionXLPrepareLatentsStep,
StableDiffusionXLPrepareAdditionalConditioningStep,
]
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]
@property
def description(self):
return (
"Before denoise step that prepare the inputs for the denoise step.\n"
+ "This is a sequential pipeline blocks:\n"
+ " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
+ " - `StableDiffusionXLSetTimestepsStep` is used to set the timesteps\n"
+ " - `StableDiffusionXLPrepareLatentsStep` is used to prepare the latents\n"
+ " - `StableDiffusionXLPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
......@@ -104,19 +102,17 @@ class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
# before_denoise: img2img
class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
block_classes = [
StableDiffusionXLInputStep,
StableDiffusionXLImg2ImgSetTimestepsStep,
StableDiffusionXLImg2ImgPrepareLatentsStep,
StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
]
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]
@property
def description(self):
return (
"Before denoise step that prepare the inputs for the denoise step for img2img task.\n"
+ "This is a sequential pipeline blocks:\n"
+ " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
+ " - `StableDiffusionXLImg2ImgSetTimestepsStep` is used to set the timesteps\n"
+ " - `StableDiffusionXLImg2ImgPrepareLatentsStep` is used to prepare the latents\n"
+ " - `StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
......@@ -126,19 +122,17 @@ class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
# before_denoise: inpainting
class StableDiffusionXLInpaintBeforeDenoiseStep(SequentialPipelineBlocks):
block_classes = [
StableDiffusionXLInputStep,
StableDiffusionXLImg2ImgSetTimestepsStep,
StableDiffusionXLInpaintPrepareLatentsStep,
StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
]
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]
@property
def description(self):
return (
"Before denoise step that prepare the inputs for the denoise step for inpainting task.\n"
+ "This is a sequential pipeline blocks:\n"
+ " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
+ " - `StableDiffusionXLImg2ImgSetTimestepsStep` is used to set the timesteps\n"
+ " - `StableDiffusionXLInpaintPrepareLatentsStep` is used to prepare the latents\n"
+ " - `StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
......@@ -255,25 +249,48 @@ class StableDiffusionXLAutoDecodeStep(AutoPipelineBlocks):
)
class StableDiffusionXLCoreDenoiseStep(SequentialPipelineBlocks):
block_classes = [
StableDiffusionXLInputStep,
StableDiffusionXLAutoBeforeDenoiseStep,
StableDiffusionXLAutoControlNetInputStep,
StableDiffusionXLAutoDenoiseStep,
]
block_names = ["input", "before_denoise", "controlnet_input", "denoise"]
@property
def description(self):
return (
"Core step that performs the denoising process. \n"
+ " - `StableDiffusionXLInputStep` (input) standardizes the inputs for the denoising step.\n"
+ " - `StableDiffusionXLAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+ " - `StableDiffusionXLAutoControlNetInputStep` (controlnet_input) prepares the controlnet input.\n"
+ " - `StableDiffusionXLAutoDenoiseStep` (denoise) iteratively denoises the latents.\n\n"
+ "This step support text-to-image, image-to-image, inpainting, with or without controlnet/controlnet_union/ip_adapter for Stable Diffusion XL:\n"
+ "- for image-to-image generation, you need to provide `image_latents`\n"
+ "- for inpainting, you need to provide `mask_image` and `image_latents`\n"
+ "- to run the controlnet workflow, you need to provide `control_image`\n"
+ "- to run the controlnet_union workflow, you need to provide `control_image` and `control_mode`\n"
+ "- to run the ip_adapter workflow, you need to load ip_adapter into your unet and provide `ip_adapter_embeds`\n"
+ "- for text-to-image generation, all you need to provide is prompt embeddings\n"
)
# ip-adapter, controlnet, text2img, img2img, inpainting
class StableDiffusionXLAutoBlocks(SequentialPipelineBlocks):
block_classes = [
StableDiffusionXLTextEncoderStep,
StableDiffusionXLAutoIPAdapterStep,
StableDiffusionXLAutoVaeEncoderStep,
StableDiffusionXLAutoBeforeDenoiseStep,
StableDiffusionXLAutoControlNetInputStep,
StableDiffusionXLAutoDenoiseStep,
StableDiffusionXLCoreDenoiseStep,
StableDiffusionXLAutoDecodeStep,
]
block_names = [
"text_encoder",
"ip_adapter",
"image_encoder",
"before_denoise",
"controlnet_input",
"vae_encoder",
"denoise",
"decoder",
"decode",
]
@property
......@@ -321,7 +338,7 @@ TEXT2IMAGE_BLOCKS = InsertableDict(
IMAGE2IMAGE_BLOCKS = InsertableDict(
[
("text_encoder", StableDiffusionXLTextEncoderStep),
("image_encoder", StableDiffusionXLVaeEncoderStep),
("vae_encoder", StableDiffusionXLVaeEncoderStep),
("input", StableDiffusionXLInputStep),
("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep),
......@@ -334,7 +351,7 @@ IMAGE2IMAGE_BLOCKS = InsertableDict(
INPAINT_BLOCKS = InsertableDict(
[
("text_encoder", StableDiffusionXLTextEncoderStep),
("image_encoder", StableDiffusionXLInpaintVaeEncoderStep),
("vae_encoder", StableDiffusionXLInpaintVaeEncoderStep),
("input", StableDiffusionXLInputStep),
("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
("prepare_latents", StableDiffusionXLInpaintPrepareLatentsStep),
......@@ -361,10 +378,8 @@ AUTO_BLOCKS = InsertableDict(
[
("text_encoder", StableDiffusionXLTextEncoderStep),
("ip_adapter", StableDiffusionXLAutoIPAdapterStep),
("image_encoder", StableDiffusionXLAutoVaeEncoderStep),
("before_denoise", StableDiffusionXLAutoBeforeDenoiseStep),
("controlnet_input", StableDiffusionXLAutoControlNetInputStep),
("denoise", StableDiffusionXLAutoDenoiseStep),
("vae_encoder", StableDiffusionXLAutoVaeEncoderStep),
("denoise", StableDiffusionXLCoreDenoiseStep),
("decode", StableDiffusionXLAutoDecodeStep),
]
)
......
......@@ -54,6 +54,8 @@ class StableDiffusionXLModularPipeline(
</Tip>
"""
default_blocks_name = "StableDiffusionXLAutoBlocks"
@property
def default_height(self):
return self.default_sample_size * self.vae_scale_factor
......
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SDXL_NODE_TYPES_PARAMS_MAP = {
"controlnet": {
"inputs": [
"control_image",
"controlnet_conditioning_scale",
"control_guidance_start",
"control_guidance_end",
"height",
"width",
],
"model_inputs": [
"controlnet",
],
"outputs": [
"controlnet_out",
],
"block_names": [None],
},
"denoise": {
"inputs": [
"embeddings",
"width",
"height",
"seed",
"num_inference_steps",
"guidance_scale",
"image_latents",
"strength",
# custom adapters coming in as inputs
"controlnet",
# ip_adapter is optional and custom; include if available
"ip_adapter",
],
"model_inputs": [
"unet",
"guider",
"scheduler",
],
"outputs": [
"latents",
"latents_preview",
],
"block_names": ["denoise"],
},
"vae_encoder": {
"inputs": [
"image",
"width",
"height",
],
"model_inputs": [
"vae",
],
"outputs": [
"image_latents",
],
"block_names": ["vae_encoder"],
},
"text_encoder": {
"inputs": [
"prompt",
"negative_prompt",
],
"model_inputs": [
"text_encoders",
],
"outputs": [
"embeddings",
],
"block_names": ["text_encoder"],
},
"decoder": {
"inputs": [
"latents",
],
"model_inputs": [
"vae",
],
"outputs": [
"images",
],
"block_names": ["decode"],
},
}
......@@ -146,13 +146,13 @@ class WanInputStep(ModularPipelineBlocks):
OutputParam(
"prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="text embeddings used to guide the image generation",
),
OutputParam(
"negative_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
description="negative text embeddings used to guide the image generation",
),
]
......
......@@ -79,11 +79,11 @@ class WanLoopDenoiser(ModularPipelineBlocks):
description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
),
InputParam(
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description=(
"All conditional model inputs that need to be prepared with guider. "
"It should contain prompt_embeds/negative_prompt_embeds. "
"Please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
"Please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
),
),
]
......
......@@ -89,13 +89,13 @@ class WanTextEncoderStep(ModularPipelineBlocks):
OutputParam(
"prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="text embeddings used to guide the image generation",
),
OutputParam(
"negative_prompt_embeds",
type_hint=torch.Tensor,
kwargs_type="guider_input_fields",
kwargs_type="denoiser_input_fields",
description="negative text embeddings used to guide the image generation",
),
]
......
......@@ -37,6 +37,8 @@ class WanModularPipeline(
</Tip>
"""
default_blocks_name = "WanAutoBlocks"
@property
def default_height(self):
return self.default_sample_height * self.vae_scale_factor_spatial
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment