refactor `get_timesteps` for SDXL img2img + add set_begin_index (#9375)

* refator + add begin_index * add kolors img2img to doc

refactor `get_timesteps` for SDXL img2img + add set_begin_index (#9375)
* refator + add begin_index * add kolors img2img to doc
485b8bb0 · YiYi Xu · GitHub · d08ad658 · 485b8bb0 · 485b8bb0
Unverified Commit 485b8bb0 authored Sep 09, 2024 by YiYi Xu Committed by GitHub Sep 09, 2024
7 changed files
--- a/docs/source/en/api/pipelines/kolors.md
+++ b/docs/source/en/api/pipelines/kolors.md
@@ -105,3 +105,11 @@ image.save("kolors_ipa_sample.png")
 - all
 - __call__
+## KolorsImg2ImgPipeline
+[[autodoc]] KolorsImg2ImgPipeline
+- all
+- __call__
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -1024,14 +1024,16 @@ class StableDiffusionXLControlNetInpaintPipeline(
        if denoising_start is None:
            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start * self.scheduler.order)
+            return timesteps, num_inference_steps - t_start
-        # Strength is irrelevant if we directly request a timestep to start at;
+        else:
-        # that is, strength is determined by the denoising_start instead.
+            # Strength is irrelevant if we directly request a timestep to start at;
-        if denoising_start is not None:
+            # that is, strength is determined by the denoising_start instead.
            discrete_timestep_cutoff = int(
                round(
                    self.scheduler.config.num_train_timesteps
@@ -1039,7 +1041,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
                )
            )
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                # if the scheduler is a 2nd order scheduler we might have to do +1
                # because `num_inference_steps` might be even given that every timestep
@@ -1050,11 +1052,12 @@ class StableDiffusionXLControlNetInpaintPipeline(
                num_inference_steps = num_inference_steps + 1
            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
+            t_start = len(self.scheduler.timesteps) - num_inference_steps
+            timesteps = self.scheduler.timesteps[t_start:]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start)
            return timesteps, num_inference_steps
-        return timesteps, num_inference_steps - t_start
    def _get_add_time_ids(
        self,
        original_size,

--- a/src/diffusers/pipelines/kolors/pipeline_kolors_img2img.py
+++ b/src/diffusers/pipelines/kolors/pipeline_kolors_img2img.py
@@ -564,14 +564,16 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
        if denoising_start is None:
            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start * self.scheduler.order)
+            return timesteps, num_inference_steps - t_start
-        # Strength is irrelevant if we directly request a timestep to start at;
+        else:
-        # that is, strength is determined by the denoising_start instead.
+            # Strength is irrelevant if we directly request a timestep to start at;
-        if denoising_start is not None:
+            # that is, strength is determined by the denoising_start instead.
            discrete_timestep_cutoff = int(
                round(
                    self.scheduler.config.num_train_timesteps
@@ -579,7 +581,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
                )
            )
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                # if the scheduler is a 2nd order scheduler we might have to do +1
                # because `num_inference_steps` might be even given that every timestep
@@ -590,11 +592,12 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
                num_inference_steps = num_inference_steps + 1
            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
+            t_start = len(self.scheduler.timesteps) - num_inference_steps
+            timesteps = self.scheduler.timesteps[t_start:]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start)
            return timesteps, num_inference_steps
-        return timesteps, num_inference_steps - t_start
    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline.prepare_latents
    def prepare_latents(
        self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True

--- a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py
@@ -648,14 +648,16 @@ class StableDiffusionXLPAGImg2ImgPipeline(
        if denoising_start is None:
            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start * self.scheduler.order)
+            return timesteps, num_inference_steps - t_start
-        # Strength is irrelevant if we directly request a timestep to start at;
+        else:
-        # that is, strength is determined by the denoising_start instead.
+            # Strength is irrelevant if we directly request a timestep to start at;
-        if denoising_start is not None:
+            # that is, strength is determined by the denoising_start instead.
            discrete_timestep_cutoff = int(
                round(
                    self.scheduler.config.num_train_timesteps
@@ -663,7 +665,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
                )
            )
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                # if the scheduler is a 2nd order scheduler we might have to do +1
                # because `num_inference_steps` might be even given that every timestep
@@ -674,11 +676,12 @@ class StableDiffusionXLPAGImg2ImgPipeline(
                num_inference_steps = num_inference_steps + 1
            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
+            t_start = len(self.scheduler.timesteps) - num_inference_steps
+            timesteps = self.scheduler.timesteps[t_start:]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start)
            return timesteps, num_inference_steps
-        return timesteps, num_inference_steps - t_start
    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline.prepare_latents
    def prepare_latents(
        self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True

--- a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py
+++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py
@@ -897,14 +897,16 @@ class StableDiffusionXLPAGInpaintPipeline(
        if denoising_start is None:
            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start * self.scheduler.order)
+            return timesteps, num_inference_steps - t_start
-        # Strength is irrelevant if we directly request a timestep to start at;
+        else:
-        # that is, strength is determined by the denoising_start instead.
+            # Strength is irrelevant if we directly request a timestep to start at;
-        if denoising_start is not None:
+            # that is, strength is determined by the denoising_start instead.
            discrete_timestep_cutoff = int(
                round(
                    self.scheduler.config.num_train_timesteps
@@ -912,7 +914,7 @@ class StableDiffusionXLPAGInpaintPipeline(
                )
            )
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                # if the scheduler is a 2nd order scheduler we might have to do +1
                # because `num_inference_steps` might be even given that every timestep
@@ -923,11 +925,12 @@ class StableDiffusionXLPAGInpaintPipeline(
                num_inference_steps = num_inference_steps + 1
            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
+            t_start = len(self.scheduler.timesteps) - num_inference_steps
+            timesteps = self.scheduler.timesteps[t_start:]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start)
            return timesteps, num_inference_steps
-        return timesteps, num_inference_steps - t_start
    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline._get_add_time_ids
    def _get_add_time_ids(
        self,

--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -640,14 +640,16 @@ class StableDiffusionXLImg2ImgPipeline(
        if denoising_start is None:
            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start * self.scheduler.order)
+            return timesteps, num_inference_steps - t_start
-        # Strength is irrelevant if we directly request a timestep to start at;
+        else:
-        # that is, strength is determined by the denoising_start instead.
+            # Strength is irrelevant if we directly request a timestep to start at;
-        if denoising_start is not None:
+            # that is, strength is determined by the denoising_start instead.
            discrete_timestep_cutoff = int(
                round(
                    self.scheduler.config.num_train_timesteps
@@ -655,7 +657,7 @@ class StableDiffusionXLImg2ImgPipeline(
                )
            )
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                # if the scheduler is a 2nd order scheduler we might have to do +1
                # because `num_inference_steps` might be even given that every timestep
@@ -666,11 +668,12 @@ class StableDiffusionXLImg2ImgPipeline(
                num_inference_steps = num_inference_steps + 1
            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
+            t_start = len(self.scheduler.timesteps) - num_inference_steps
+            timesteps = self.scheduler.timesteps[t_start:]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start)
            return timesteps, num_inference_steps
-        return timesteps, num_inference_steps - t_start
    def prepare_latents(
        self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
    ):

--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -901,14 +901,16 @@ class StableDiffusionXLInpaintPipeline(
        if denoising_start is None:
            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start * self.scheduler.order)
+            return timesteps, num_inference_steps - t_start
-        # Strength is irrelevant if we directly request a timestep to start at;
+        else:
-        # that is, strength is determined by the denoising_start instead.
+            # Strength is irrelevant if we directly request a timestep to start at;
-        if denoising_start is not None:
+            # that is, strength is determined by the denoising_start instead.
            discrete_timestep_cutoff = int(
                round(
                    self.scheduler.config.num_train_timesteps
@@ -916,7 +918,7 @@ class StableDiffusionXLInpaintPipeline(
                )
            )
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                # if the scheduler is a 2nd order scheduler we might have to do +1
                # because `num_inference_steps` might be even given that every timestep
@@ -927,11 +929,12 @@ class StableDiffusionXLInpaintPipeline(
                num_inference_steps = num_inference_steps + 1
            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
+            t_start = len(self.scheduler.timesteps) - num_inference_steps
+            timesteps = self.scheduler.timesteps[t_start:]
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index(t_start)
            return timesteps, num_inference_steps
-        return timesteps, num_inference_steps - t_start
    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline._get_add_time_ids
    def _get_add_time_ids(
        self,