Merge remote-tracking branch 'origin/main'

# Conflicts: # src/diffusers/__init__.py # src/diffusers/schedulers/__init__.py # src/diffusers/schedulers/glide_ddim.py

Merge remote-tracking branch 'origin/main'
# Conflicts: # src/diffusers/__init__.py # src/diffusers/schedulers/__init__.py # src/diffusers/schedulers/glide_ddim.py
9c530191 · anton-l · ca2635d9 · f28cb9e1 · 9c530191 · 9c530191
Commit 9c530191 authored Jun 13, 2022 by anton-l
12 changed files
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Both models and schedulers should be load- and saveable from the Hub.

 ```python
 import torch
-from diffusers import UNetModel, GaussianDDPMScheduler
+from diffusers import UNetModel, DDPMScheduler
 import PIL
 import numpy as np
 import tqdm
@@ -44,7 +44,7 @@ generator = torch.manual_seed(0)
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"

 # 1. Load models
-noise_scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church", tensor_format="pt")
+noise_scheduler = DDPMScheduler.from_config("fusing/ddpm-lsun-church", tensor_format="pt")
 unet = UNetModel.from_pretrained("fusing/ddpm-lsun-church").to(torch_device)

 # 2. Sample gaussian noise

--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -10,7 +10,9 @@ from .models.unet_glide import GLIDESuperResUNetModel, GLIDETextToImageUNetModel
 from .models.unet_ldm import UNetLDMModel
 from .pipeline_utils import DiffusionPipeline
 from .pipelines import DDIM, DDPM, GLIDE, LatentDiffusion
-from .schedulers import SchedulerMixin
+
+from .schedulers import SchedulerMixin, DDIMScheduler, DDPMScheduler
+
 from .schedulers.classifier_free_guidance import ClassifierFreeGuidanceScheduler
 from .schedulers.ddim import DDIMScheduler
 from .schedulers.gaussian_ddpm import GaussianDDPMScheduler
--- a/src/diffusers/pipelines/old/ddim/run_ddpm.py
+++ b/src/diffusers/pipelines/old/ddim/run_ddpm.py
 #!/usr/bin/env python3
 import torch

-from diffusers import GaussianDDPMScheduler, UNetModel
+from diffusers import DDPMScheduler, UNetModel


 model = UNetModel(dim=64, dim_mults=(1, 2, 4, 8))

-diffusion = GaussianDDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2
+diffusion = DDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2

 training_images = torch.randn(8, 3, 128, 128)  # your images need to be normalized from a range of -1 to +1
 loss = diffusion(training_images)

--- a/src/diffusers/pipelines/old/ddpm/run_ddpm.py
+++ b/src/diffusers/pipelines/old/ddpm/run_ddpm.py
 #!/usr/bin/env python3
 import torch

-from diffusers import GaussianDDPMScheduler, UNetModel
+from diffusers import DDPMScheduler, UNetModel


 model = UNetModel(dim=64, dim_mults=(1, 2, 4, 8))

-diffusion = GaussianDDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2
+diffusion = DDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2

 training_images = torch.randn(8, 3, 128, 128)  # your images need to be normalized from a range of -1 to +1
 loss = diffusion(training_images)

--- a/src/diffusers/schedulers/__init__.py
+++ b/src/diffusers/schedulers/__init__.py
@@ -16,7 +16,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from .scheduling_ddim import DDIMScheduler
+from .scheduling_ddpm import DDPMScheduler
+from .scheduling_utils import SchedulerMixin
+
 from .classifier_free_guidance import ClassifierFreeGuidanceScheduler
 from .ddim import DDIMScheduler
 from .gaussian_ddpm import GaussianDDPMScheduler
+from .glide_ddim import GlideDDIMScheduler
 from .schedulers_utils import SchedulerMixin
--- a/src/diffusers/schedulers/glide_ddim.py
+++ b/src/diffusers/schedulers/glide_ddim.py
--- a/src/diffusers/schedulers/ddim.py
+++ b/src/diffusers/schedulers/ddim.py
@@ -16,7 +16,7 @@ import math
 import numpy as np

 from ..configuration_utils import ConfigMixin
-from .schedulers_utils import SchedulerMixin, betas_for_alpha_bar, linear_beta_schedule
+from .scheduling_utils import SchedulerMixin, betas_for_alpha_bar, linear_beta_schedule


 class DDIMScheduler(SchedulerMixin, ConfigMixin):

--- a/src/diffusers/schedulers/gaussian_ddpm.py
+++ b/src/diffusers/schedulers/gaussian_ddpm.py
@@ -16,10 +16,10 @@ import math
 import numpy as np

 from ..configuration_utils import ConfigMixin
-from .schedulers_utils import SchedulerMixin, betas_for_alpha_bar, linear_beta_schedule
+from .scheduling_utils import SchedulerMixin, betas_for_alpha_bar, linear_beta_schedule


-class GaussianDDPMScheduler(SchedulerMixin, ConfigMixin):
+class DDPMScheduler(SchedulerMixin, ConfigMixin):
    def __init__(
        self,
        timesteps=1000,

--- a/src/diffusers/schedulers/scheduling_plms.py
+++ b/src/diffusers/schedulers/scheduling_plms.py
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+from ..configuration_utils import ConfigMixin
+from .schedulers_utils import SchedulerMixin, betas_for_alpha_bar, linear_beta_schedule
+
+
+def noise_like(shape, device, repeat=False):
+    repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
+    noise = lambda: torch.randn(shape, device=device)
+    return repeat_noise() if repeat else noise()
+
+
+def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True):
+    if ddim_discr_method == 'uniform':
+        c = num_ddpm_timesteps // num_ddim_timesteps
+        ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))
+    elif ddim_discr_method == 'quad':
+        ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * .8), num_ddim_timesteps)) ** 2).astype(int)
+    else:
+        raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"')
+
+    # assert ddim_timesteps.shape[0] == num_ddim_timesteps
+    # add one to get the final alpha values right (the ones from first scale to data during sampling)
+    steps_out = ddim_timesteps + 1
+    if verbose:
+        print(f'Selected timesteps for ddim sampler: {steps_out}')
+    return steps_out
+
+
+def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True):
+    # select alphas for computing the variance schedule
+    alphas = alphacums[ddim_timesteps]
+    alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist())
+
+    # according the the formula provided in https://arxiv.org/abs/2010.02502
+    sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev))
+    if verbose:
+        print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}')
+        print(f'For the chosen value of eta, which is {eta}, '
+              f'this results in the following sigma_t schedule for ddim sampler {sigmas}')
+    return sigmas, alphas, alphas_prev
+
+
+class PLMSSampler(object):
+    def __init__(self, model, schedule="linear", **kwargs):
+        super().__init__()
+        self.model = model
+        self.ddpm_num_timesteps = model.num_timesteps
+        self.schedule = schedule
+
+    def register_buffer(self, name, attr):
+        if type(attr) == torch.Tensor:
+            if attr.device != torch.device("cuda"):
+                attr = attr.to(torch.device("cuda"))
+        setattr(self, name, attr)
+
+    def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
+        if ddim_eta != 0:
+            raise ValueError('ddim_eta must be 0 for PLMS')
+        self.ddim_timesteps = make_ddim_timesteps(ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps,
+                                                  num_ddpm_timesteps=self.ddpm_num_timesteps,verbose=verbose)
+        alphas_cumprod = self.model.alphas_cumprod
+        assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep'
+        to_torch = lambda x: x.clone().detach().to(torch.float32).to(self.model.device)
+
+        self.register_buffer('betas', to_torch(self.model.betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev))
+
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod.cpu())))
+        self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu() - 1)))
+
+        # ddim sampling parameters
+        ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters(alphacums=alphas_cumprod.cpu(),
+                                                                                   ddim_timesteps=self.ddim_timesteps,
+                                                                                   eta=ddim_eta,verbose=verbose)
+        self.register_buffer('ddim_sigmas', ddim_sigmas)
+        self.register_buffer('ddim_alphas', ddim_alphas)
+        self.register_buffer('ddim_alphas_prev', ddim_alphas_prev)
+        self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1. - ddim_alphas))
+        sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt(
+            (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * (
+                        1 - self.alphas_cumprod / self.alphas_cumprod_prev))
+        self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps)
+
+    @torch.no_grad()
+    def sample(self,
+               S,
+               batch_size,
+               shape,
+               conditioning=None,
+               callback=None,
+               normals_sequence=None,
+               img_callback=None,
+               quantize_x0=False,
+               eta=0.,
+               mask=None,
+               x0=None,
+               temperature=1.,
+               noise_dropout=0.,
+               score_corrector=None,
+               corrector_kwargs=None,
+               verbose=True,
+               x_T=None,
+               log_every_t=100,
+               unconditional_guidance_scale=1.,
+               unconditional_conditioning=None,
+               # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
+               **kwargs
+               ):
+        if conditioning is not None:
+            if isinstance(conditioning, dict):
+                cbs = conditioning[list(conditioning.keys())[0]].shape[0]
+                if cbs != batch_size:
+                    print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+            else:
+                if conditioning.shape[0] != batch_size:
+                    print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
+
+        self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
+        # sampling
+        C, H, W = shape
+        size = (batch_size, C, H, W)
+        print(f'Data shape for PLMS sampling is {size}')
+
+        samples, intermediates = self.plms_sampling(conditioning, size,
+                                                    callback=callback,
+                                                    img_callback=img_callback,
+                                                    quantize_denoised=quantize_x0,
+                                                    mask=mask, x0=x0,
+                                                    ddim_use_original_steps=False,
+                                                    noise_dropout=noise_dropout,
+                                                    temperature=temperature,
+                                                    score_corrector=score_corrector,
+                                                    corrector_kwargs=corrector_kwargs,
+                                                    x_T=x_T,
+                                                    log_every_t=log_every_t,
+                                                    unconditional_guidance_scale=unconditional_guidance_scale,
+                                                    unconditional_conditioning=unconditional_conditioning,
+                                                    )
+        return samples, intermediates
+
+    @torch.no_grad()
+    def plms_sampling(self, cond, shape,
+                      x_T=None, ddim_use_original_steps=False,
+                      callback=None, timesteps=None, quantize_denoised=False,
+                      mask=None, x0=None, img_callback=None, log_every_t=100,
+                      temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+                      unconditional_guidance_scale=1., unconditional_conditioning=None,):
+        device = self.model.betas.device
+        b = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=device)
+        else:
+            img = x_T
+
+        if timesteps is None:
+            timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
+        elif timesteps is not None and not ddim_use_original_steps:
+            subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
+            timesteps = self.ddim_timesteps[:subset_end]
+
+        intermediates = {'x_inter': [img], 'pred_x0': [img]}
+        time_range = list(reversed(range(0,timesteps))) if ddim_use_original_steps else np.flip(timesteps)
+        total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
+        print(f"Running PLMS Sampling with {total_steps} timesteps")
+
+        iterator = tqdm(time_range, desc='PLMS Sampler', total=total_steps)
+        old_eps = []
+
+        for i, step in enumerate(iterator):
+            index = total_steps - i - 1
+            ts = torch.full((b,), step, device=device, dtype=torch.long)
+            ts_next = torch.full((b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long)
+
+            if mask is not None:
+                assert x0 is not None
+                img_orig = self.model.q_sample(x0, ts)  # TODO: deterministic forward pass?
+                img = img_orig * mask + (1. - mask) * img
+
+            outs = self.p_sample_plms(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
+                                      quantize_denoised=quantize_denoised, temperature=temperature,
+                                      noise_dropout=noise_dropout, score_corrector=score_corrector,
+                                      corrector_kwargs=corrector_kwargs,
+                                      unconditional_guidance_scale=unconditional_guidance_scale,
+                                      unconditional_conditioning=unconditional_conditioning,
+                                      old_eps=old_eps, t_next=ts_next)
+            img, pred_x0, e_t = outs
+            old_eps.append(e_t)
+            if len(old_eps) >= 4:
+                old_eps.pop(0)
+            if callback: callback(i)
+            if img_callback: img_callback(pred_x0, i)
+
+            if index % log_every_t == 0 or index == total_steps - 1:
+                intermediates['x_inter'].append(img)
+                intermediates['pred_x0'].append(pred_x0)
+
+        return img, intermediates
+
+    @torch.no_grad()
+    def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
+                      temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+                      unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None):
+        b, *_, device = *x.shape, x.device
+
+        def get_model_output(x, t):
+            if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
+                e_t = self.model.apply_model(x, t, c)
+            else:
+                x_in = torch.cat([x] * 2)
+                t_in = torch.cat([t] * 2)
+                c_in = torch.cat([unconditional_conditioning, c])
+                e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
+                e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
+
+            if score_corrector is not None:
+                assert self.model.parameterization == "eps"
+                e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
+
+            return e_t
+
+        alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
+        alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
+        sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
+        sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
+
+        def get_x_prev_and_pred_x0(e_t, index):
+            # select parameters corresponding to the currently considered timestep
+            a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
+            a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
+            sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
+            sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
+
+            # current prediction for x_0
+            pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
+            if quantize_denoised:
+                pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
+            # direction pointing to x_t
+            dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
+            noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
+            if noise_dropout > 0.:
+                noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+            x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
+            return x_prev, pred_x0
+
+        e_t = get_model_output(x, t)
+        if len(old_eps) == 0:
+            # Pseudo Improved Euler (2nd order)
+            x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t, index)
+            e_t_next = get_model_output(x_prev, t_next)
+            e_t_prime = (e_t + e_t_next) / 2
+        elif len(old_eps) == 1:
+            # 2nd order Pseudo Linear Multistep (Adams-Bashforth)
+            e_t_prime = (3 * e_t - old_eps[-1]) / 2
+        elif len(old_eps) == 2:
+            # 3nd order Pseudo Linear Multistep (Adams-Bashforth)
+            e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12
+        elif len(old_eps) >= 3:
+            # 4nd order Pseudo Linear Multistep (Adams-Bashforth)
+            e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24
+
+        x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index)
+
+        return x_prev, pred_x0, e_t
--- a/src/diffusers/schedulers/schedulers_utils.py
+++ b/src/diffusers/schedulers/schedulers_utils.py
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -19,7 +19,7 @@ import unittest

 import torch

-from diffusers import DDIM, DDPM, DDIMScheduler, GaussianDDPMScheduler, LatentDiffusion, UNetModel
+from diffusers import DDIM, DDPM, DDIMScheduler, DDPMScheduler, LatentDiffusion, UNetModel
 from diffusers.configuration_utils import ConfigMixin
 from diffusers.pipeline_utils import DiffusionPipeline
 from diffusers.testing_utils import floats_tensor, slow, torch_device
@@ -107,7 +107,7 @@ class PipelineTesterMixin(unittest.TestCase):
    def test_from_pretrained_save_pretrained(self):
        # 1. Load models
        model = UNetModel(ch=32, ch_mult=(1, 2), num_res_blocks=2, attn_resolutions=(16,), resolution=32)
-        schedular = GaussianDDPMScheduler(timesteps=10)
+        schedular = DDPMScheduler(timesteps=10)

        ddpm = DDPM(model, schedular)

@@ -147,7 +147,7 @@ class PipelineTesterMixin(unittest.TestCase):
        model_id = "fusing/ddpm-cifar10"

        unet = UNetModel.from_pretrained(model_id)
-        noise_scheduler = GaussianDDPMScheduler.from_config(model_id)
+        noise_scheduler = DDPMScheduler.from_config(model_id)
        noise_scheduler = noise_scheduler.set_format("pt")

        ddpm = DDPM(unet=unet, noise_scheduler=noise_scheduler)

--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -20,7 +20,7 @@ import unittest
 import numpy as np
 import torch

-from diffusers import DDIMScheduler, GaussianDDPMScheduler
+from diffusers import DDIMScheduler, DDPMScheduler


 torch.backends.cuda.matmul.allow_tf32 = False
@@ -163,7 +163,7 @@ class SchedulerCommonTest(unittest.TestCase):


 class DDPMSchedulerTest(SchedulerCommonTest):
-    scheduler_classes = (GaussianDDPMScheduler,)
+    scheduler_classes = (DDPMScheduler,)

    def get_scheduler_config(self, **kwargs):
        config = {