Unverified Commit ee7e141d authored by hlky's avatar hlky Committed by GitHub
Browse files

Use pipelines without vae (#10441)



* Use pipelines without vae

* getattr

* vqvae

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent 01bd7964
...@@ -211,9 +211,7 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile ...@@ -211,9 +211,7 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
transformer=transformer, transformer=transformer,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
......
...@@ -208,15 +208,14 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin): ...@@ -208,15 +208,14 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
transformer=transformer, transformer=transformer,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible # Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this # by the patch size. So the vae scale factor is multiplied by the patch size to account for this
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
self.mask_processor = VaeImageProcessor( self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor * 2, vae_scale_factor=self.vae_scale_factor * 2,
vae_latent_channels=self.vae.config.latent_channels, vae_latent_channels=latent_channels,
do_normalize=False, do_normalize=False,
do_binarize=True, do_binarize=True,
do_convert_grayscale=True, do_convert_grayscale=True,
......
...@@ -184,12 +184,8 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin): ...@@ -184,12 +184,8 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
tokenizer_2=tokenizer_2, tokenizer_2=tokenizer_2,
) )
self.vae_scale_factor_temporal = ( self.vae_scale_factor_temporal = self.vae.temporal_compression_ratio if getattr(self, "vae", None) else 4
self.vae.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4 self.vae_scale_factor_spatial = self.vae.spatial_compression_ratio if getattr(self, "vae", None) else 8
)
self.vae_scale_factor_spatial = (
self.vae.spatial_compression_ratio if hasattr(self, "vae") and self.vae is not None else 8
)
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial) self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
def _get_llama_prompt_embeds( def _get_llama_prompt_embeds(
......
...@@ -240,9 +240,7 @@ class HunyuanDiTPipeline(DiffusionPipeline): ...@@ -240,9 +240,7 @@ class HunyuanDiTPipeline(DiffusionPipeline):
" checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead." " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
self.default_sample_size = ( self.default_sample_size = (
......
...@@ -133,7 +133,7 @@ class I2VGenXLPipeline( ...@@ -133,7 +133,7 @@ class I2VGenXLPipeline(
unet=unet, unet=unet,
scheduler=scheduler, scheduler=scheduler,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
# `do_resize=False` as we do custom resizing. # `do_resize=False` as we do custom resizing.
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor, do_resize=False) self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor, do_resize=False)
......
...@@ -188,9 +188,7 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionXLL ...@@ -188,9 +188,7 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionXLL
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size self.default_sample_size = self.unet.config.sample_size
......
...@@ -207,9 +207,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu ...@@ -207,9 +207,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size self.default_sample_size = self.unet.config.sample_size
......
...@@ -226,7 +226,7 @@ class LatentConsistencyModelImg2ImgPipeline( ...@@ -226,7 +226,7 @@ class LatentConsistencyModelImg2ImgPipeline(
" information, please have a look at https://github.com/huggingface/diffusers/pull/254 ." " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt
......
...@@ -209,7 +209,7 @@ class LatentConsistencyModelPipeline( ...@@ -209,7 +209,7 @@ class LatentConsistencyModelPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -180,7 +180,7 @@ class LattePipeline(DiffusionPipeline): ...@@ -180,7 +180,7 @@ class LattePipeline(DiffusionPipeline):
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor) self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor)
# Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/utils.py # Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/utils.py
......
...@@ -389,7 +389,7 @@ class LEditsPPPipelineStableDiffusion( ...@@ -389,7 +389,7 @@ class LEditsPPPipelineStableDiffusion(
safety_checker=safety_checker, safety_checker=safety_checker,
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
......
...@@ -372,7 +372,7 @@ class LEditsPPPipelineStableDiffusionXL( ...@@ -372,7 +372,7 @@ class LEditsPPPipelineStableDiffusionXL(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
if not isinstance(scheduler, DDIMScheduler) and not isinstance(scheduler, DPMSolverMultistepScheduler): if not isinstance(scheduler, DDIMScheduler) and not isinstance(scheduler, DPMSolverMultistepScheduler):
......
...@@ -174,7 +174,7 @@ class MarigoldDepthPipeline(DiffusionPipeline): ...@@ -174,7 +174,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
default_processing_resolution=default_processing_resolution, default_processing_resolution=default_processing_resolution,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.scale_invariant = scale_invariant self.scale_invariant = scale_invariant
self.shift_invariant = shift_invariant self.shift_invariant = shift_invariant
......
...@@ -161,7 +161,7 @@ class MarigoldNormalsPipeline(DiffusionPipeline): ...@@ -161,7 +161,7 @@ class MarigoldNormalsPipeline(DiffusionPipeline):
default_processing_resolution=default_processing_resolution, default_processing_resolution=default_processing_resolution,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.use_full_z_range = use_full_z_range self.use_full_z_range = use_full_z_range
self.default_denoising_steps = default_denoising_steps self.default_denoising_steps = default_denoising_steps
......
...@@ -111,7 +111,7 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -111,7 +111,7 @@ class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
scheduler=scheduler, scheduler=scheduler,
vocoder=vocoder, vocoder=vocoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
def _encode_prompt( def _encode_prompt(
self, self,
......
...@@ -251,7 +251,7 @@ class StableDiffusionControlNetPAGPipeline( ...@@ -251,7 +251,7 @@ class StableDiffusionControlNetPAGPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor( self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
......
...@@ -228,7 +228,7 @@ class StableDiffusionControlNetPAGInpaintPipeline( ...@@ -228,7 +228,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.mask_processor = VaeImageProcessor( self.mask_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
......
...@@ -280,7 +280,7 @@ class StableDiffusionXLControlNetPAGPipeline( ...@@ -280,7 +280,7 @@ class StableDiffusionXLControlNetPAGPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor( self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
......
...@@ -270,7 +270,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline( ...@@ -270,7 +270,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
image_encoder=image_encoder, image_encoder=image_encoder,
) )
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
self.control_image_processor = VaeImageProcessor( self.control_image_processor = VaeImageProcessor(
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
......
...@@ -245,9 +245,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin): ...@@ -245,9 +245,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
" checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead." " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
) )
self.vae_scale_factor = ( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
self.default_sample_size = ( self.default_sample_size = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment