modular_blocks.py 7.05 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ...utils import logging
from ..modular_pipeline import AutoPipelineBlocks, SequentialPipelineBlocks
from ..modular_pipeline_utils import InsertableDict
18
19
20
21
22
23
24
from .before_denoise import (
    FluxImg2ImgPrepareLatentsStep,
    FluxImg2ImgSetTimestepsStep,
    FluxInputStep,
    FluxPrepareLatentsStep,
    FluxSetTimestepsStep,
)
25
26
from .decoders import FluxDecodeStep
from .denoise import FluxDenoiseStep
27
from .encoders import FluxTextEncoderStep, FluxVaeEncoderStep
28
29
30
31
32


logger = logging.get_logger(__name__)  # pylint: disable=invalid-name


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# vae encoder (run before before_denoise)
class FluxAutoVaeEncoderStep(AutoPipelineBlocks):
    block_classes = [FluxVaeEncoderStep]
    block_names = ["img2img"]
    block_trigger_inputs = ["image"]

    @property
    def description(self):
        return (
            "Vae encoder step that encode the image inputs into their latent representations.\n"
            + "This is an auto pipeline block that works for img2img tasks.\n"
            + " - `FluxVaeEncoderStep` (img2img) is used when only `image` is provided."
            + " - if `image` is provided, step will be skipped."
        )


# before_denoise: text2img, img2img
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class FluxBeforeDenoiseStep(SequentialPipelineBlocks):
    block_classes = [
        FluxInputStep,
        FluxPrepareLatentsStep,
        FluxSetTimestepsStep,
    ]
    block_names = ["input", "prepare_latents", "set_timesteps"]

    @property
    def description(self):
        return (
            "Before denoise step that prepare the inputs for the denoise step.\n"
            + "This is a sequential pipeline blocks:\n"
            + " - `FluxInputStep` is used to adjust the batch size of the model inputs\n"
            + " - `FluxPrepareLatentsStep` is used to prepare the latents\n"
            + " - `FluxSetTimestepsStep` is used to set the timesteps\n"
        )


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# before_denoise: img2img
class FluxImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
    block_classes = [FluxInputStep, FluxImg2ImgSetTimestepsStep, FluxImg2ImgPrepareLatentsStep]
    block_names = ["input", "set_timesteps", "prepare_latents"]

    @property
    def description(self):
        return (
            "Before denoise step that prepare the inputs for the denoise step for img2img task.\n"
            + "This is a sequential pipeline blocks:\n"
            + " - `FluxInputStep` is used to adjust the batch size of the model inputs\n"
            + " - `FluxImg2ImgSetTimestepsStep` is used to set the timesteps\n"
            + " - `FluxImg2ImgPrepareLatentsStep` is used to prepare the latents\n"
        )


# before_denoise: all task (text2img, img2img)
86
class FluxAutoBeforeDenoiseStep(AutoPipelineBlocks):
87
88
89
    block_classes = [FluxImg2ImgBeforeDenoiseStep, FluxBeforeDenoiseStep]
    block_names = ["img2img", "text2image"]
    block_trigger_inputs = ["image_latents", None]
90
91
92
93
94
95
96

    @property
    def description(self):
        return (
            "Before denoise step that prepare the inputs for the denoise step.\n"
            + "This is an auto pipeline block that works for text2image.\n"
            + " - `FluxBeforeDenoiseStep` (text2image) is used.\n"
97
            + " - `FluxImg2ImgBeforeDenoiseStep` (img2img) is used when only `image_latents` is provided.\n"
98
99
100
101
102
103
104
105
106
107
108
109
110
        )


# denoise: text2image
class FluxAutoDenoiseStep(AutoPipelineBlocks):
    block_classes = [FluxDenoiseStep]
    block_names = ["denoise"]
    block_trigger_inputs = [None]

    @property
    def description(self) -> str:
        return (
            "Denoise step that iteratively denoise the latents. "
111
112
            "This is a auto pipeline block that works for text2image and img2img tasks."
            " - `FluxDenoiseStep` (denoise) for text2image and img2img tasks."
113
114
115
116
117
118
119
120
121
122
123
        )


# decode: all task (text2img, img2img, inpainting)
class FluxAutoDecodeStep(AutoPipelineBlocks):
    block_classes = [FluxDecodeStep]
    block_names = ["non-inpaint"]
    block_trigger_inputs = [None]

    @property
    def description(self):
124
        return "Decode step that decode the denoised latents into image outputs.\n - `FluxDecodeStep`"
125
126


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
class FluxCoreDenoiseStep(SequentialPipelineBlocks):
    block_classes = [FluxInputStep, FluxAutoBeforeDenoiseStep, FluxAutoDenoiseStep]
    block_names = ["input", "before_denoise", "denoise"]

    @property
    def description(self):
        return (
            "Core step that performs the denoising process. \n"
            + " - `FluxInputStep` (input) standardizes the inputs for the denoising step.\n"
            + " - `FluxAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
            + " - `FluxAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
            + "This step support text-to-image and image-to-image tasks for Flux:\n"
            + " - for image-to-image generation, you need to provide `image_latents`\n"
            + " - for text-to-image generation, all you need to provide is prompt embeddings"
        )


144
145
# text2image
class FluxAutoBlocks(SequentialPipelineBlocks):
146
147
148
    block_classes = [
        FluxTextEncoderStep,
        FluxAutoVaeEncoderStep,
149
        FluxCoreDenoiseStep,
150
151
        FluxAutoDecodeStep,
    ]
152
    block_names = ["text_encoder", "image_encoder", "denoise", "decode"]
153
154
155
156

    @property
    def description(self):
        return (
157
158
159
            "Auto Modular pipeline for text-to-image and image-to-image using Flux.\n"
            + "- for text-to-image generation, all you need to provide is `prompt`\n"
            + "- for image-to-image generation, you need to provide either `image` or `image_latents`"
160
161
162
163
164
165
166
        )


TEXT2IMAGE_BLOCKS = InsertableDict(
    [
        ("text_encoder", FluxTextEncoderStep),
        ("input", FluxInputStep),
167
        ("prepare_latents", FluxPrepareLatentsStep),
168
        ("set_timesteps", FluxSetTimestepsStep),
169
170
171
172
173
        ("denoise", FluxDenoiseStep),
        ("decode", FluxDecodeStep),
    ]
)

174
175
176
177
178
179
180
181
182
183
184
IMAGE2IMAGE_BLOCKS = InsertableDict(
    [
        ("text_encoder", FluxTextEncoderStep),
        ("image_encoder", FluxVaeEncoderStep),
        ("input", FluxInputStep),
        ("set_timesteps", FluxImg2ImgSetTimestepsStep),
        ("prepare_latents", FluxImg2ImgPrepareLatentsStep),
        ("denoise", FluxDenoiseStep),
        ("decode", FluxDecodeStep),
    ]
)
185
186
187
188

AUTO_BLOCKS = InsertableDict(
    [
        ("text_encoder", FluxTextEncoderStep),
189
        ("image_encoder", FluxAutoVaeEncoderStep),
190
        ("denoise", FluxCoreDenoiseStep),
191
192
193
194
195
        ("decode", FluxAutoDecodeStep),
    ]
)


196
ALL_BLOCKS = {"text2image": TEXT2IMAGE_BLOCKS, "img2img": IMAGE2IMAGE_BLOCKS, "auto": AUTO_BLOCKS}