Commit 0e13d329 authored by anton-l's avatar anton-l
Browse files

Merge remote-tracking branch 'origin/main'

# Conflicts:
#	tests/test_modeling_utils.py
parents 3f9e3d8a e13ee8b5
......@@ -420,7 +420,7 @@ class TextEncoder(ModelMixin, ConfigMixin):
return mu, logw, x_mask
class GradTTS(DiffusionPipeline):
class GradTTSPipeline(DiffusionPipeline):
def __init__(self, unet, text_encoder, noise_scheduler, tokenizer):
super().__init__()
noise_scheduler = noise_scheduler.set_format("pt")
......@@ -430,7 +430,14 @@ class GradTTS(DiffusionPipeline):
@torch.no_grad()
def __call__(
self, text, num_inference_steps=50, temperature=1.3, length_scale=0.91, speaker_id=15, torch_device=None
self,
text,
num_inference_steps=50,
temperature=1.3,
length_scale=0.91,
speaker_id=15,
torch_device=None,
generator=None,
):
if torch_device is None:
torch_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
......@@ -464,17 +471,19 @@ class GradTTS(DiffusionPipeline):
mu_y = mu_y.transpose(1, 2)
# Sample latent representation from terminal distribution N(mu_y, I)
z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
z = mu_y + torch.randn(mu_y.shape, generator=generator).to(mu_y.device)
xt = z * y_mask
h = 1.0 / num_inference_steps
# (Patrick: TODO)
for t in tqdm.tqdm(range(num_inference_steps), total=num_inference_steps):
t_new = num_inference_steps - t - 1
t = (1.0 - (t + 0.5) * h) * torch.ones(z.shape[0], dtype=z.dtype, device=z.device)
time = t.unsqueeze(-1).unsqueeze(-1)
residual = self.unet(xt, t, mu_y, y_mask, speaker_id)
xt = self.noise_scheduler.step(xt, residual, mu_y, h, time)
scheduler_residual = residual - mu_y + xt
xt = self.noise_scheduler.step(scheduler_residual, xt, t_new, num_inference_steps)
xt = xt * y_mask
return xt[:, :, :y_max_length]
......@@ -21,7 +21,7 @@ import tqdm
from ..pipeline_utils import DiffusionPipeline
class PNDM(DiffusionPipeline):
class PNDMPipeline(DiffusionPipeline):
def __init__(self, unet, noise_scheduler):
super().__init__()
noise_scheduler = noise_scheduler.set_format("pt")
......
#!/usr/bin/env python3
import torch
from diffusers import DiffusionPipeline
# TODO(Patrick, Anton, Suraj) - rename `x` to better variable names
class ScoreSdeVePipeline(DiffusionPipeline):
def __init__(self, model, scheduler):
super().__init__()
self.register_modules(model=model, scheduler=scheduler)
def __call__(self, num_inference_steps=2000, generator=None):
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
img_size = self.model.config.image_size
channels = self.model.config.num_channels
shape = (1, channels, img_size, img_size)
model = self.model.to(device)
# TODO(Patrick) move to scheduler config
n_steps = 1
x = torch.randn(*shape) * self.scheduler.config.sigma_max
x = x.to(device)
self.scheduler.set_timesteps(num_inference_steps)
self.scheduler.set_sigmas(num_inference_steps)
for i, t in enumerate(self.scheduler.timesteps):
sigma_t = self.scheduler.sigmas[i] * torch.ones(shape[0], device=device)
for _ in range(n_steps):
with torch.no_grad():
result = self.model(x, sigma_t)
x = self.scheduler.step_correct(result, x)
with torch.no_grad():
result = model(x, sigma_t)
x, x_mean = self.scheduler.step_pred(result, x, t)
return x_mean
#!/usr/bin/env python3
import torch
from diffusers import DiffusionPipeline
# TODO(Patrick, Anton, Suraj) - rename `x` to better variable names
class ScoreSdeVpPipeline(DiffusionPipeline):
def __init__(self, model, scheduler):
super().__init__()
self.register_modules(model=model, scheduler=scheduler)
def __call__(self, num_inference_steps=1000, generator=None):
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
img_size = self.model.config.image_size
channels = self.model.config.num_channels
shape = (1, channels, img_size, img_size)
model = self.model.to(device)
x = torch.randn(*shape).to(device)
self.scheduler.set_timesteps(num_inference_steps)
for t in self.scheduler.timesteps:
t = t * torch.ones(shape[0], device=device)
scaled_t = t * (num_inference_steps - 1)
with torch.no_grad():
result = model(x, scaled_t)
x, x_mean = self.scheduler.step_pred(result, x, t)
x_mean = (x_mean + 1.0) / 2.0
return x_mean
......@@ -20,4 +20,6 @@ from .scheduling_ddim import DDIMScheduler
from .scheduling_ddpm import DDPMScheduler
from .scheduling_grad_tts import GradTTSScheduler
from .scheduling_pndm import PNDMScheduler
from .scheduling_sde_ve import ScoreSdeVeScheduler
from .scheduling_sde_vp import ScoreSdeVpScheduler
from .scheduling_utils import SchedulerMixin
......@@ -92,9 +92,9 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
alpha_prod_t = self.alphas_cumprod[t]
alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one
# For t > 0, compute predicted variance βt (see formala (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf)
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf)
# and sample from it to get previous sample
# x_{t-1} ~ N(pred_prev_sample, variance) == add variane to pred_sample
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t]
if variance_type is None:
......
......@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from ..configuration_utils import ConfigMixin
from .scheduling_utils import SchedulerMixin
......@@ -19,29 +21,34 @@ from .scheduling_utils import SchedulerMixin
class GradTTSScheduler(SchedulerMixin, ConfigMixin):
def __init__(
self,
timesteps=1000,
beta_start=0.0001,
beta_end=0.02,
beta_start=0.05,
beta_end=20,
tensor_format="np",
):
super().__init__()
self.register_to_config(
timesteps=timesteps,
beta_start=beta_start,
beta_end=beta_end,
)
self.set_format(tensor_format=tensor_format)
self.betas = None
def get_timesteps(self, num_inference_steps):
return np.array([(t + 0.5) / num_inference_steps for t in range(num_inference_steps)])
def set_betas(self, num_inference_steps):
timesteps = self.get_timesteps(num_inference_steps)
self.betas = np.array([self.beta_start + (self.beta_end - self.beta_start) * t for t in timesteps])
def step(self, residual, sample, t, num_inference_steps):
# This is a VE scheduler from https://arxiv.org/pdf/2011.13456.pdf (see Algorithm 2 in Appendix)
if self.betas is None:
self.set_betas(num_inference_steps)
def sample_noise(self, timestep):
noise = self.beta_start + (self.beta_end - self.beta_start) * timestep
return noise
beta_t = self.betas[t]
beta_t_deriv = beta_t / num_inference_steps
def step(self, xt, residual, mu, h, timestep):
noise_t = self.sample_noise(timestep)
dxt = 0.5 * (mu - xt - residual)
dxt = dxt * noise_t * h
xt = xt - dxt
return xt
sample_deriv = residual * beta_t_deriv / 2
def __len__(self):
return len(self.config.timesteps)
sample = sample + sample_deriv
return sample
# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch
# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit
import numpy as np
import torch
from ..configuration_utils import ConfigMixin
from .scheduling_utils import SchedulerMixin
class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
def __init__(self, snr=0.15, sigma_min=0.01, sigma_max=1348, sampling_eps=1e-5, tensor_format="np"):
super().__init__()
self.register_to_config(
snr=snr,
sigma_min=sigma_min,
sigma_max=sigma_max,
sampling_eps=sampling_eps,
)
self.sigmas = None
self.discrete_sigmas = None
self.timesteps = None
def set_timesteps(self, num_inference_steps):
self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps)
def set_sigmas(self, num_inference_steps):
if self.timesteps is None:
self.set_timesteps(num_inference_steps)
self.discrete_sigmas = torch.exp(
torch.linspace(np.log(self.config.sigma_min), np.log(self.config.sigma_max), num_inference_steps)
)
self.sigmas = torch.tensor(
[self.config.sigma_min * (self.config.sigma_max / self.sigma_min) ** t for t in self.timesteps]
)
def step_pred(self, result, x, t):
# TODO(Patrick) better comments + non-PyTorch
t = t * torch.ones(x.shape[0], device=x.device)
timestep = (t * (len(self.timesteps) - 1)).long()
sigma = self.discrete_sigmas.to(t.device)[timestep]
adjacent_sigma = torch.where(
timestep == 0, torch.zeros_like(t), self.discrete_sigmas[timestep - 1].to(timestep.device)
)
f = torch.zeros_like(x)
G = torch.sqrt(sigma**2 - adjacent_sigma**2)
f = f - G[:, None, None, None] ** 2 * result
z = torch.randn_like(x)
x_mean = x - f
x = x_mean + G[:, None, None, None] * z
return x, x_mean
def step_correct(self, result, x):
# TODO(Patrick) better comments + non-PyTorch
noise = torch.randn_like(x)
grad_norm = torch.norm(result.reshape(result.shape[0], -1), dim=-1).mean()
noise_norm = torch.norm(noise.reshape(noise.shape[0], -1), dim=-1).mean()
step_size = (self.config.snr * noise_norm / grad_norm) ** 2 * 2
step_size = step_size * torch.ones(x.shape[0], device=x.device)
x_mean = x + step_size[:, None, None, None] * result
x = x_mean + torch.sqrt(step_size * 2)[:, None, None, None] * noise
return x
# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch
# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit
import numpy as np
import torch
from ..configuration_utils import ConfigMixin
from .scheduling_utils import SchedulerMixin
class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
def __init__(self, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"):
super().__init__()
self.register_to_config(
beta_min=beta_min,
beta_max=beta_max,
sampling_eps=sampling_eps,
)
self.sigmas = None
self.discrete_sigmas = None
self.timesteps = None
def set_timesteps(self, num_inference_steps):
self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps)
def step_pred(self, result, x, t):
# TODO(Patrick) better comments + non-PyTorch
# postprocess model result
log_mean_coeff = (
-0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min
)
std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff))
result = -result / std[:, None, None, None]
# compute
dt = -1.0 / len(self.timesteps)
beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min)
drift = -0.5 * beta_t[:, None, None, None] * x
diffusion = torch.sqrt(beta_t)
drift = drift - diffusion[:, None, None, None] ** 2 * result
x_mean = x + drift * dt
# add noise
z = torch.randn_like(x)
x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * z
return x, x_mean
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import tempfile
import unittest
import numpy as np
import torch
from diffusers.models.embeddings import get_timestep_embedding
from diffusers.testing_utils import floats_tensor, slow, torch_device
torch.backends.cuda.matmul.allow_tf32 = False
class EmbeddingsTests(unittest.TestCase):
def test_timestep_embeddings(self):
embedding_dim = 256
timesteps = torch.arange(16)
t1 = get_timestep_embedding(timesteps, embedding_dim)
# first vector should always be composed only of 0's and 1's
assert (t1[0, : embedding_dim // 2] - 0).abs().sum() < 1e-5
assert (t1[0, embedding_dim // 2 :] - 1).abs().sum() < 1e-5
# last element of each vector should be one
assert (t1[:, -1] - 1).abs().sum() < 1e-5
# For large embeddings (e.g. 128) the frequency of every vector is higher
# than the previous one which means that the gradients of later vectors are
# ALWAYS higher than the previous ones
grad_mean = np.abs(np.gradient(t1, axis=-1)).mean(axis=1)
prev_grad = 0.0
for grad in grad_mean:
assert grad > prev_grad
prev_grad = grad
def test_timestep_defaults(self):
embedding_dim = 16
timesteps = torch.arange(10)
t1 = get_timestep_embedding(timesteps, embedding_dim)
t2 = get_timestep_embedding(
timesteps, embedding_dim, flip_sin_to_cos=False, downscale_freq_shift=1, max_period=10_000
)
assert torch.allclose(t1.cpu(), t2.cpu(), 1e-3)
def test_timestep_flip_sin_cos(self):
embedding_dim = 16
timesteps = torch.arange(10)
t1 = get_timestep_embedding(timesteps, embedding_dim, flip_sin_to_cos=True)
t1 = torch.cat([t1[:, embedding_dim // 2 :], t1[:, : embedding_dim // 2]], dim=-1)
t2 = get_timestep_embedding(timesteps, embedding_dim, flip_sin_to_cos=False)
assert torch.allclose(t1.cpu(), t2.cpu(), 1e-3)
def test_timestep_downscale_freq_shift(self):
embedding_dim = 16
timesteps = torch.arange(10)
t1 = get_timestep_embedding(timesteps, embedding_dim, downscale_freq_shift=0)
t2 = get_timestep_embedding(timesteps, embedding_dim, downscale_freq_shift=1)
# get cosine half (vectors that are wrapped into cosine)
cosine_half = (t1 - t2)[:, embedding_dim // 2 :]
# cosine needs to be negative
assert (np.abs((cosine_half <= 0).numpy()) - 1).sum() < 1e-5
def test_sinoid_embeddings_hardcoded(self):
embedding_dim = 64
timesteps = torch.arange(128)
# standard unet, score_vde
t1 = get_timestep_embedding(timesteps, embedding_dim, downscale_freq_shift=1, flip_sin_to_cos=False)
# glide, ldm
t2 = get_timestep_embedding(timesteps, embedding_dim, downscale_freq_shift=0, flip_sin_to_cos=True)
# grad-tts
t3 = get_timestep_embedding(timesteps, embedding_dim, scale=1000)
assert torch.allclose(
t1[23:26, 47:50].flatten().cpu(),
torch.tensor([0.9646, 0.9804, 0.9892, 0.9615, 0.9787, 0.9882, 0.9582, 0.9769, 0.9872]),
1e-3,
)
assert torch.allclose(
t2[23:26, 47:50].flatten().cpu(),
torch.tensor([0.3019, 0.2280, 0.1716, 0.3146, 0.2377, 0.1790, 0.3272, 0.2474, 0.1864]),
1e-3,
)
assert torch.allclose(
t3[23:26, 47:50].flatten().cpu(),
torch.tensor([-0.9801, -0.9464, -0.9349, -0.3952, 0.8887, -0.9709, 0.5299, -0.2853, -0.9927]),
1e-3,
)
......@@ -22,18 +22,24 @@ import numpy as np
import torch
from diffusers import (
BDDM,
DDIM,
DDPM,
PNDM,
BDDMPipeline,
DDIMPipeline,
DDIMScheduler,
DDPMPipeline,
DDPMScheduler,
Glide,
GlidePipeline,
GlideSuperResUNetModel,
GlideTextToImageUNetModel,
GradTTS,
LatentDiffusion,
GradTTSPipeline,
GradTTSScheduler,
LatentDiffusionPipeline,
NCSNpp,
PNDMPipeline,
PNDMScheduler,
ScoreSdeVePipeline,
ScoreSdeVeScheduler,
ScoreSdeVpPipeline,
ScoreSdeVpScheduler,
UNetGradTTSModel,
UNetLDMModel,
UNetModel,
......@@ -107,7 +113,7 @@ class ModelTesterMixin:
new_image = new_model(**inputs_dict)
max_diff = (image - new_image).abs().sum().item()
self.assertLessEqual(max_diff, 1e-5, "Models give different forward passes")
self.assertLessEqual(max_diff, 5e-5, "Models give different forward passes")
def test_determinism(self):
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
......@@ -425,11 +431,12 @@ class GlideTextToImageUNetModelTests(ModelTesterMixin, unittest.TestCase):
emb = torch.randn((1, 16, model.config.transformer_dim)).to(torch_device)
time_step = torch.tensor([10] * noise.shape[0], device=torch_device)
model.to(torch_device)
with torch.no_grad():
output = model(noise, time_step, emb)
output, _ = torch.split(output, 3, dim=1)
output_slice = output[0, -1, -3:, -3:].flatten()
output_slice = output[0, -1, -3:, -3:].cpu().flatten()
# fmt: off
expected_output_slice = torch.tensor([2.7766, -10.3558, -14.9149, -0.9376, -14.9175, -17.7679, -5.5565, -12.9521, -12.9845])
# fmt: on
......@@ -583,11 +590,11 @@ class PipelineTesterMixin(unittest.TestCase):
model = UNetModel(ch=32, ch_mult=(1, 2), num_res_blocks=2, attn_resolutions=(16,), resolution=32)
schedular = DDPMScheduler(timesteps=10)
ddpm = DDPM(model, schedular)
ddpm = DDPMPipeline(model, schedular)
with tempfile.TemporaryDirectory() as tmpdirname:
ddpm.save_pretrained(tmpdirname)
new_ddpm = DDPM.from_pretrained(tmpdirname)
new_ddpm = DDPMPipeline.from_pretrained(tmpdirname)
generator = torch.manual_seed(0)
......@@ -601,7 +608,7 @@ class PipelineTesterMixin(unittest.TestCase):
def test_from_pretrained_hub(self):
model_path = "fusing/ddpm-cifar10"
ddpm = DDPM.from_pretrained(model_path)
ddpm = DDPMPipeline.from_pretrained(model_path)
ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path)
ddpm.noise_scheduler.num_timesteps = 10
......@@ -624,7 +631,7 @@ class PipelineTesterMixin(unittest.TestCase):
noise_scheduler = DDPMScheduler.from_config(model_id)
noise_scheduler = noise_scheduler.set_format("pt")
ddpm = DDPM(unet=unet, noise_scheduler=noise_scheduler)
ddpm = DDPMPipeline(unet=unet, noise_scheduler=noise_scheduler)
image = ddpm(generator=generator)
image_slice = image[0, -1, -3:, -3:].cpu()
......@@ -641,7 +648,7 @@ class PipelineTesterMixin(unittest.TestCase):
unet = UNetModel.from_pretrained(model_id)
noise_scheduler = DDIMScheduler(tensor_format="pt")
ddim = DDIM(unet=unet, noise_scheduler=noise_scheduler)
ddim = DDIMPipeline(unet=unet, noise_scheduler=noise_scheduler)
image = ddim(generator=generator, eta=0.0)
image_slice = image[0, -1, -3:, -3:].cpu()
......@@ -660,7 +667,7 @@ class PipelineTesterMixin(unittest.TestCase):
unet = UNetModel.from_pretrained(model_id)
noise_scheduler = PNDMScheduler(tensor_format="pt")
pndm = PNDM(unet=unet, noise_scheduler=noise_scheduler)
pndm = PNDMPipeline(unet=unet, noise_scheduler=noise_scheduler)
image = pndm(generator=generator)
image_slice = image[0, -1, -3:, -3:].cpu()
......@@ -672,9 +679,10 @@ class PipelineTesterMixin(unittest.TestCase):
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
@slow
@unittest.skip("Skipping for now as it takes too long")
def test_ldm_text2img(self):
model_id = "fusing/latent-diffusion-text2im-large"
ldm = LatentDiffusion.from_pretrained(model_id)
ldm = LatentDiffusionPipeline.from_pretrained(model_id)
prompt = "A painting of a squirrel eating a burger"
generator = torch.manual_seed(0)
......@@ -686,10 +694,25 @@ class PipelineTesterMixin(unittest.TestCase):
expected_slice = torch.tensor([0.7295, 0.7358, 0.7256, 0.7435, 0.7095, 0.6884, 0.7325, 0.6921, 0.6458])
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
@slow
def test_ldm_text2img_fast(self):
model_id = "fusing/latent-diffusion-text2im-large"
ldm = LatentDiffusionPipeline.from_pretrained(model_id)
prompt = "A painting of a squirrel eating a burger"
generator = torch.manual_seed(0)
image = ldm([prompt], generator=generator, num_inference_steps=1)
image_slice = image[0, -1, -3:, -3:].cpu()
assert image.shape == (1, 3, 256, 256)
expected_slice = torch.tensor([0.3163, 0.8670, 0.6465, 0.1865, 0.6291, 0.5139, 0.2824, 0.3723, 0.4344])
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
@slow
def test_glide_text2img(self):
model_id = "fusing/glide-base"
glide = Glide.from_pretrained(model_id)
glide = GlidePipeline.from_pretrained(model_id)
prompt = "a pencil sketch of a corgi"
generator = torch.manual_seed(0)
......@@ -704,22 +727,61 @@ class PipelineTesterMixin(unittest.TestCase):
@slow
def test_grad_tts(self):
model_id = "fusing/grad-tts-libri-tts"
grad_tts = GradTTS.from_pretrained(model_id)
grad_tts = GradTTSPipeline.from_pretrained(model_id)
noise_scheduler = GradTTSScheduler()
grad_tts.noise_scheduler = noise_scheduler
text = "Hello world, I missed you so much."
generator = torch.manual_seed(0)
# generate mel spectograms using text
mel_spec = grad_tts(text)
mel_spec = grad_tts(text, generator=generator)
assert mel_spec.shape == (1, 256, 256, 3)
expected_slice = torch.tensor([0.7119, 0.7073, 0.6460, 0.7780, 0.7423, 0.6926, 0.7378, 0.7189, 0.7784])
assert (mel_spec.flatten() - expected_slice).abs().max() < 1e-2
assert mel_spec.shape == (1, 80, 143)
expected_slice = torch.tensor(
[-6.7584, -6.8347, -6.3293, -6.6437, -6.7233, -6.4684, -6.1187, -6.3172, -6.6890]
)
assert (mel_spec[0, :3, :3].cpu().flatten() - expected_slice).abs().max() < 1e-2
@slow
def test_score_sde_ve_pipeline(self):
torch.manual_seed(0)
model = NCSNpp.from_pretrained("fusing/ffhq_ncsnpp")
scheduler = ScoreSdeVeScheduler.from_config("fusing/ffhq_ncsnpp")
sde_ve = ScoreSdeVePipeline(model=model, scheduler=scheduler)
image = sde_ve(num_inference_steps=2)
expected_image_sum = 3382810112.0
expected_image_mean = 1075.366455078125
assert (image.abs().sum() - expected_image_sum).abs().cpu().item() < 1e-2
assert (image.abs().mean() - expected_image_mean).abs().cpu().item() < 1e-4
@slow
def test_score_sde_vp_pipeline(self):
model = NCSNpp.from_pretrained("fusing/cifar10-ddpmpp-vp")
scheduler = ScoreSdeVpScheduler.from_config("fusing/cifar10-ddpmpp-vp")
sde_vp = ScoreSdeVpPipeline(model=model, scheduler=scheduler)
torch.manual_seed(0)
image = sde_vp(num_inference_steps=10)
expected_image_sum = 4183.2012
expected_image_mean = 1.3617
assert (image.abs().sum() - expected_image_sum).abs().cpu().item() < 1e-2
assert (image.abs().mean() - expected_image_mean).abs().cpu().item() < 1e-4
def test_module_from_pipeline(self):
model = DiffWave(num_res_layers=4)
noise_scheduler = DDPMScheduler(timesteps=12)
bddm = BDDM(model, noise_scheduler)
bddm = BDDMPipeline(model, noise_scheduler)
# check if the library name for the diffwave moduel is set to pipeline module
self.assertTrue(bddm.config["diffwave"][0] == "pipeline_bddm")
......@@ -727,6 +789,6 @@ class PipelineTesterMixin(unittest.TestCase):
# check if we can save and load the pipeline
with tempfile.TemporaryDirectory() as tmpdirname:
bddm.save_pretrained(tmpdirname)
_ = BDDM.from_pretrained(tmpdirname)
_ = BDDMPipeline.from_pretrained(tmpdirname)
# check if the same works using the DifusionPipeline class
_ = DiffusionPipeline.from_pretrained(tmpdirname)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment