Unverified Commit e649678b authored by C's avatar C Committed by GitHub
Browse files

[Flux] Optimize guidance creation in flux pipeline by moving it outside the loop (#9153)



* optimize guidance creation in flux pipeline by moving it outside the loop

* use torch.full instead of torch.tensor to create a tensor with a single value

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent 39b87b14
...@@ -677,6 +677,13 @@ class FluxPipeline(DiffusionPipeline, FluxLoraLoaderMixin): ...@@ -677,6 +677,13 @@ class FluxPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0) num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
self._num_timesteps = len(timesteps) self._num_timesteps = len(timesteps)
# handle guidance
if self.transformer.config.guidance_embeds:
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
guidance = guidance.expand(latents.shape[0])
else:
guidance = None
# 6. Denoising loop # 6. Denoising loop
with self.progress_bar(total=num_inference_steps) as progress_bar: with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps): for i, t in enumerate(timesteps):
...@@ -686,13 +693,6 @@ class FluxPipeline(DiffusionPipeline, FluxLoraLoaderMixin): ...@@ -686,13 +693,6 @@ class FluxPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
timestep = t.expand(latents.shape[0]).to(latents.dtype) timestep = t.expand(latents.shape[0]).to(latents.dtype)
# handle guidance
if self.transformer.config.guidance_embeds:
guidance = torch.tensor([guidance_scale], device=device)
guidance = guidance.expand(latents.shape[0])
else:
guidance = None
noise_pred = self.transformer( noise_pred = self.transformer(
hidden_states=latents, hidden_states=latents,
# YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transforme rmodel (we should not keep it but I want to keep the inputs same for the model for testing) # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transforme rmodel (we should not keep it but I want to keep the inputs same for the model for testing)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment