Unverified Commit ee7e141d authored by hlky's avatar hlky Committed by GitHub
Browse files

Use pipelines without vae (#10441)



* Use pipelines without vae

* getattr

* vqvae

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent 01bd7964
...@@ -196,7 +196,7 @@ class StableDiffusionXLControlNetXSPipeline( ...@@ -196,7 +196,7 @@ class StableDiffusionXLControlNetXSPipeline(
scheduler=scheduler, scheduler=scheduler,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor( self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
......
...@@ -284,7 +284,7 @@ class AltDiffusionPipeline( ...@@ -284,7 +284,7 @@ class AltDiffusionPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -312,7 +312,7 @@ class AltDiffusionImg2ImgPipeline( ...@@ -312,7 +312,7 @@ class AltDiffusionImg2ImgPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -243,7 +243,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Sta ...@@ -243,7 +243,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Sta
safety_checker=safety_checker, safety_checker=safety_checker,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -213,7 +213,7 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -213,7 +213,7 @@ class StableDiffusionInpaintPipelineLegacy(
safety_checker=safety_checker, safety_checker=safety_checker,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -121,7 +121,7 @@ class StableDiffusionModelEditingPipeline( ...@@ -121,7 +121,7 @@ class StableDiffusionModelEditingPipeline(
safety_checker=safety_checker, safety_checker=safety_checker,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -143,7 +143,7 @@ class StableDiffusionParadigmsPipeline( ...@@ -143,7 +143,7 @@ class StableDiffusionParadigmsPipeline(
safety_checker=safety_checker, safety_checker=safety_checker,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -365,7 +365,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -365,7 +365,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
caption_generator=caption_generator, caption_generator=caption_generator,
inverse_scheduler=inverse_scheduler, inverse_scheduler=inverse_scheduler,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -76,7 +76,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -76,7 +76,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
vae=vae, vae=vae,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
@torch.no_grad() @torch.no_grad()
def image_variation( def image_variation(
......
...@@ -94,7 +94,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -94,7 +94,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
vae=vae, vae=vae,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
if self.text_unet is not None and ( if self.text_unet is not None and (
......
...@@ -77,7 +77,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -77,7 +77,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
vae=vae, vae=vae,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
......
...@@ -82,7 +82,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline): ...@@ -82,7 +82,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
vae=vae, vae=vae,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
if self.text_unet is not None: if self.text_unet is not None:
......
...@@ -206,9 +206,7 @@ class FluxPipeline( ...@@ -206,9 +206,7 @@ class FluxPipeline(
image_encoder=image_encoder, image_encoder=image_encoder,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
......
...@@ -212,12 +212,8 @@ class FluxControlPipeline( ...@@ -212,12 +212,8 @@ class FluxControlPipeline(
transformer=transformer, transformer=transformer,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8 self.vae_latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
)
self.vae_latent_channels = (
self.vae.config.latent_channels if hasattr(self, "vae") and self.vae is not None else 16
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor( self.image_processor = VaeImageProcessor(
......
...@@ -227,9 +227,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin ...@@ -227,9 +227,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
transformer=transformer, transformer=transformer,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
......
...@@ -258,15 +258,14 @@ class FluxControlInpaintPipeline( ...@@ -258,15 +258,14 @@ class FluxControlInpaintPipeline(
transformer=transformer, transformer=transformer,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
self.mask_processor = VaeImageProcessor( self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor * 2, vae_scale_factor=self.vae_scale_factor * 2,
vae_latent_channels=self.vae.config.latent_channels, vae_latent_channels=latent_channels,
do_normalize=False, do_normalize=False,
do_binarize=True, do_binarize=True,
do_convert_grayscale=True, do_convert_grayscale=True,
......
...@@ -229,9 +229,7 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF ...@@ -229,9 +229,7 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
scheduler=scheduler, scheduler=scheduler,
controlnet=controlnet, controlnet=controlnet,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
......
...@@ -227,9 +227,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From ...@@ -227,9 +227,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
scheduler=scheduler, scheduler=scheduler,
controlnet=controlnet, controlnet=controlnet,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
......
...@@ -230,15 +230,14 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From ...@@ -230,15 +230,14 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
controlnet=controlnet, controlnet=controlnet,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
self.mask_processor = VaeImageProcessor( self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor * 2, vae_scale_factor=self.vae_scale_factor * 2,
vae_latent_channels=self.vae.config.latent_channels, vae_latent_channels=latent_channels,
do_normalize=False, do_normalize=False,
do_binarize=True, do_binarize=True,
do_convert_grayscale=True, do_convert_grayscale=True,
......
...@@ -221,15 +221,14 @@ class FluxFillPipeline( ...@@ -221,15 +221,14 @@ class FluxFillPipeline(
transformer=transformer, transformer=transformer,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
self.mask_processor = VaeImageProcessor( self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor * 2, vae_scale_factor=self.vae_scale_factor * 2,
vae_latent_channels=self.vae.config.latent_channels, vae_latent_channels=latent_channels,
do_normalize=False, do_normalize=False,
do_binarize=True, do_binarize=True,
do_convert_grayscale=True, do_convert_grayscale=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment