fix setup

cbb19ee8 · Patrick von Platen · 2234877e · cbb19ee8 · cbb19ee8 · cbb19ee8
Commit cbb19ee8 authored Jun 09, 2022 by Patrick von Platen
20 changed files
--- a/models/vision/ddim/example.py
+++ b/models/vision/ddim/example.py
 #!/usr/bin/env python3
 import os
 import pathlib
-from modeling_ddim import DDIM
-import PIL.Image
 import numpy as np
+import PIL.Image
+from modeling_ddim import DDIM
 model_ids = ["ddim-celeba-hq", "ddim-lsun-church", "ddim-lsun-bedroom"]
 for model_id in model_ids:

--- a/models/vision/ddim/modeling_ddim.py
+++ b/models/vision/ddim/modeling_ddim.py
@@ -14,13 +14,13 @@
 # limitations under the License.
-from diffusers import DiffusionPipeline
-import tqdm
 import torch
+import tqdm
+from diffusers import DiffusionPipeline
 class DDIM(DiffusionPipeline):
    def __init__(self, unet, noise_scheduler):
        super().__init__()
        self.register_modules(unet=unet, noise_scheduler=noise_scheduler)
@@ -34,12 +34,16 @@ class DDIM(DiffusionPipeline):
        inference_step_times = range(0, num_trained_timesteps, num_trained_timesteps // num_inference_steps)
        self.unet.to(torch_device)
-        image = self.noise_scheduler.sample_noise((batch_size, self.unet.in_channels, self.unet.resolution, self.unet.resolution), device=torch_device, generator=generator)
+        image = self.noise_scheduler.sample_noise(
+            (batch_size, self.unet.in_channels, self.unet.resolution, self.unet.resolution),
+            device=torch_device,
+            generator=generator,
+        )
        for t in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps):
            # get actual t and t-1
            train_step = inference_step_times[t]
-            prev_train_step = inference_step_times[t - 1] if t > 0 else - 1
+            prev_train_step = inference_step_times[t - 1] if t > 0 else -1
            # compute alphas
            alpha_prod_t = self.noise_scheduler.get_alpha_prod(train_step)
@@ -50,8 +54,14 @@ class DDIM(DiffusionPipeline):
            beta_prod_t_prev_sqrt = (1 - alpha_prod_t_prev).sqrt()
            # compute relevant coefficients
-            coeff_1 = (alpha_prod_t_prev - alpha_prod_t).sqrt() * alpha_prod_t_prev_rsqrt * beta_prod_t_prev_sqrt / beta_prod_t_sqrt * eta
+            coeff_1 = (
-            coeff_2 = ((1 - alpha_prod_t_prev) - coeff_1 ** 2).sqrt()
+                (alpha_prod_t_prev - alpha_prod_t).sqrt()
+                * alpha_prod_t_prev_rsqrt
+                * beta_prod_t_prev_sqrt
+                / beta_prod_t_sqrt
+                * eta
+            )
+            coeff_2 = ((1 - alpha_prod_t_prev) - coeff_1**2).sqrt()
            # model forward
            with torch.no_grad():

--- a/models/vision/ddim/run_inference.py
+++ b/models/vision/ddim/run_inference.py
 #!/usr/bin/env python3
 # !pip install diffusers
-from modeling_ddim import DDIM
-import PIL.Image
 import numpy as np
+import PIL.Image
+from modeling_ddim import DDIM
 model_id = "fusing/ddpm-cifar10"
 model_id = "fusing/ddpm-lsun-bedroom"

--- a/models/vision/ddpm/example.py
+++ b/models/vision/ddpm/example.py
 #!/usr/bin/env python3
 import os
 import pathlib
-from modeling_ddpm import DDPM
-import PIL.Image
 import numpy as np
-model_ids = ["ddpm-lsun-cat", "ddpm-lsun-cat-ema", "ddpm-lsun-church-ema", "ddpm-lsun-church", "ddpm-lsun-bedroom", "ddpm-lsun-bedroom-ema", "ddpm-cifar10-ema", "ddpm-cifar10", "ddpm-celeba-hq", "ddpm-celeba-hq-ema"]
+import PIL.Image
+from modeling_ddpm import DDPM
+model_ids = [
+    "ddpm-lsun-cat",
+    "ddpm-lsun-cat-ema",
+    "ddpm-lsun-church-ema",
+    "ddpm-lsun-church",
+    "ddpm-lsun-bedroom",
+    "ddpm-lsun-bedroom-ema",
+    "ddpm-cifar10-ema",
+    "ddpm-cifar10",
+    "ddpm-celeba-hq",
+    "ddpm-celeba-hq-ema",
+]
 for model_id in model_ids:
    path = os.path.join("/home/patrick/images/hf", model_id)

--- a/models/vision/ddpm/modeling_ddpm.py
+++ b/models/vision/ddpm/modeling_ddpm.py
@@ -14,13 +14,13 @@
 # limitations under the License.
-from diffusers import DiffusionPipeline
-import tqdm
 import torch
+import tqdm
+from diffusers import DiffusionPipeline
 class DDPM(DiffusionPipeline):
    def __init__(self, unet, noise_scheduler):
        super().__init__()
        self.register_modules(unet=unet, noise_scheduler=noise_scheduler)
@@ -31,13 +31,25 @@ class DDPM(DiffusionPipeline):
        self.unet.to(torch_device)
        # 1. Sample gaussian noise
-        image = self.noise_scheduler.sample_noise((batch_size, self.unet.in_channels, self.unet.resolution, self.unet.resolution), device=torch_device, generator=generator)
+        image = self.noise_scheduler.sample_noise(
+            (batch_size, self.unet.in_channels, self.unet.resolution, self.unet.resolution),
+            device=torch_device,
+            generator=generator,
+        )
        for t in tqdm.tqdm(reversed(range(len(self.noise_scheduler))), total=len(self.noise_scheduler)):
            # i) define coefficients for time step t
            clipped_image_coeff = 1 / torch.sqrt(self.noise_scheduler.get_alpha_prod(t))
            clipped_noise_coeff = torch.sqrt(1 / self.noise_scheduler.get_alpha_prod(t) - 1)
-            image_coeff = (1 - self.noise_scheduler.get_alpha_prod(t - 1)) * torch.sqrt(self.noise_scheduler.get_alpha(t)) / (1 - self.noise_scheduler.get_alpha_prod(t))
+            image_coeff = (
-            clipped_coeff = torch.sqrt(self.noise_scheduler.get_alpha_prod(t - 1)) * self.noise_scheduler.get_beta(t) / (1 - self.noise_scheduler.get_alpha_prod(t))
+                (1 - self.noise_scheduler.get_alpha_prod(t - 1))
+                * torch.sqrt(self.noise_scheduler.get_alpha(t))
+                / (1 - self.noise_scheduler.get_alpha_prod(t))
+            )
+            clipped_coeff = (
+                torch.sqrt(self.noise_scheduler.get_alpha_prod(t - 1))
+                * self.noise_scheduler.get_beta(t)
+                / (1 - self.noise_scheduler.get_alpha_prod(t))
+            )
            # ii) predict noise residual
            with torch.no_grad():
@@ -50,7 +62,9 @@ class DDPM(DiffusionPipeline):
            prev_image = clipped_coeff * pred_mean + image_coeff * image
            # iv) sample variance
-            prev_variance = self.noise_scheduler.sample_variance(t, prev_image.shape, device=torch_device, generator=generator)
+            prev_variance = self.noise_scheduler.sample_variance(
+                t, prev_image.shape, device=torch_device, generator=generator
+            )
            # v) sample  x_{t-1} ~ N(prev_image, prev_variance)
            sampled_prev_image = prev_image + prev_variance

--- a/models/vision/glide/convert_weights.py
+++ b/models/vision/glide/convert_weights.py
 import torch
 from torch import nn
-from diffusers import ClassifierFreeGuidanceScheduler, GlideDDIMScheduler, CLIPTextModel, GLIDETextToImageUNetModel, GLIDESuperResUNetModel
+from diffusers import (
+    ClassifierFreeGuidanceScheduler,
+    CLIPTextModel,
+    GlideDDIMScheduler,
+    GLIDESuperResUNetModel,
+    GLIDETextToImageUNetModel,
+)
 from modeling_glide import GLIDE
 from transformers import CLIPTextConfig, GPT2Tokenizer
@@ -22,7 +28,9 @@ config = CLIPTextConfig(
    use_padding_embeddings=True,
 )
 model = CLIPTextModel(config).eval()
-tokenizer = GPT2Tokenizer("./glide-base/tokenizer/vocab.json", "./glide-base/tokenizer/merges.txt", pad_token="<|endoftext|>")
+tokenizer = GPT2Tokenizer(
+    "./glide-base/tokenizer/vocab.json", "./glide-base/tokenizer/merges.txt", pad_token="<|endoftext|>"
+)
 hf_encoder = model.text_model
@@ -97,10 +105,13 @@ superres_model.load_state_dict(ups_state_dict, strict=False)
 upscale_scheduler = GlideDDIMScheduler(timesteps=1000, beta_schedule="linear")
-glide = GLIDE(text_unet=text2im_model, text_noise_scheduler=text_scheduler, text_encoder=model, tokenizer=tokenizer,
+glide = GLIDE(
-              upscale_unet=superres_model, upscale_noise_scheduler=upscale_scheduler)
+    text_unet=text2im_model,
+    text_noise_scheduler=text_scheduler,
+    text_encoder=model,
+    tokenizer=tokenizer,
+    upscale_unet=superres_model,
+    upscale_noise_scheduler=upscale_scheduler,
+)
 glide.save_pretrained("./glide-base")
--- a/models/vision/glide/modeling_glide.py
+++ b/models/vision/glide/modeling_glide.py
@@ -18,7 +18,14 @@ import numpy as np
 import torch
 import tqdm
-from diffusers import ClassifierFreeGuidanceScheduler, GlideDDIMScheduler, CLIPTextModel, DiffusionPipeline, GLIDETextToImageUNetModel, GLIDESuperResUNetModel
+from diffusers import (
+    ClassifierFreeGuidanceScheduler,
+    CLIPTextModel,
+    DiffusionPipeline,
+    GlideDDIMScheduler,
+    GLIDESuperResUNetModel,
+    GLIDETextToImageUNetModel,
+)
 from transformers import GPT2Tokenizer
@@ -46,12 +53,16 @@ class GLIDE(DiffusionPipeline):
        text_encoder: CLIPTextModel,
        tokenizer: GPT2Tokenizer,
        upscale_unet: GLIDESuperResUNetModel,
-        upscale_noise_scheduler: GlideDDIMScheduler
+        upscale_noise_scheduler: GlideDDIMScheduler,
    ):
        super().__init__()
        self.register_modules(
-            text_unet=text_unet, text_noise_scheduler=text_noise_scheduler, text_encoder=text_encoder, tokenizer=tokenizer,
+            text_unet=text_unet,
-            upscale_unet=upscale_unet, upscale_noise_scheduler=upscale_noise_scheduler
+            text_noise_scheduler=text_noise_scheduler,
+            text_encoder=text_encoder,
+            tokenizer=tokenizer,
+            upscale_unet=upscale_unet,
+            upscale_noise_scheduler=upscale_noise_scheduler,
        )
    def q_posterior_mean_variance(self, scheduler, x_start, x_t, t):
@@ -67,9 +78,7 @@ class GLIDE(DiffusionPipeline):
            + _extract_into_tensor(scheduler.posterior_mean_coef2, t, x_t.shape) * x_t
        )
        posterior_variance = _extract_into_tensor(scheduler.posterior_variance, t, x_t.shape)
-        posterior_log_variance_clipped = _extract_into_tensor(
+        posterior_log_variance_clipped = _extract_into_tensor(scheduler.posterior_log_variance_clipped, t, x_t.shape)
-            scheduler.posterior_log_variance_clipped, t, x_t.shape
-        )
        assert (
            posterior_mean.shape[0]
            == posterior_variance.shape[0]
@@ -190,19 +199,30 @@ class GLIDE(DiffusionPipeline):
        # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
        upsample_temp = 0.997
-        image = self.upscale_noise_scheduler.sample_noise(
+        image = (
-            (batch_size, 3, 256, 256), device=torch_device, generator=generator
+            self.upscale_noise_scheduler.sample_noise(
-        ) * upsample_temp
+                (batch_size, 3, 256, 256), device=torch_device, generator=generator
+            )
+            * upsample_temp
+        )
        num_timesteps = len(self.upscale_noise_scheduler)
-        for t in tqdm.tqdm(reversed(range(len(self.upscale_noise_scheduler))), total=len(self.upscale_noise_scheduler)):
+        for t in tqdm.tqdm(
+            reversed(range(len(self.upscale_noise_scheduler))), total=len(self.upscale_noise_scheduler)
+        ):
            # i) define coefficients for time step t
            clipped_image_coeff = 1 / torch.sqrt(self.upscale_noise_scheduler.get_alpha_prod(t))
            clipped_noise_coeff = torch.sqrt(1 / self.upscale_noise_scheduler.get_alpha_prod(t) - 1)
-            image_coeff = (1 - self.upscale_noise_scheduler.get_alpha_prod(t - 1)) * torch.sqrt(
+            image_coeff = (
-                self.upscale_noise_scheduler.get_alpha(t)) / (1 - self.upscale_noise_scheduler.get_alpha_prod(t))
+                (1 - self.upscale_noise_scheduler.get_alpha_prod(t - 1))
-            clipped_coeff = torch.sqrt(self.upscale_noise_scheduler.get_alpha_prod(t - 1)) * self.upscale_noise_scheduler.get_beta(
+                * torch.sqrt(self.upscale_noise_scheduler.get_alpha(t))
-                t) / (1 - self.upscale_noise_scheduler.get_alpha_prod(t))
+                / (1 - self.upscale_noise_scheduler.get_alpha_prod(t))
+            )
+            clipped_coeff = (
+                torch.sqrt(self.upscale_noise_scheduler.get_alpha_prod(t - 1))
+                * self.upscale_noise_scheduler.get_beta(t)
+                / (1 - self.upscale_noise_scheduler.get_alpha_prod(t))
+            )
            # ii) predict noise residual
            time_input = torch.tensor([t] * image.shape[0], device=torch_device)
@@ -216,8 +236,9 @@ class GLIDE(DiffusionPipeline):
            prev_image = clipped_coeff * pred_mean + image_coeff * image
            # iv) sample variance
-            prev_variance = self.upscale_noise_scheduler.sample_variance(t, prev_image.shape, device=torch_device,
+            prev_variance = self.upscale_noise_scheduler.sample_variance(
-                                                                 generator=generator)
+                t, prev_image.shape, device=torch_device, generator=generator
+            )
            # v) sample  x_{t-1} ~ N(prev_image, prev_variance)
            sampled_prev_image = prev_image + prev_variance

--- a/models/vision/glide/run_glide.py
+++ b/models/vision/glide/run_glide.py
 import torch
-from diffusers import DiffusionPipeline
 import PIL.Image
+from diffusers import DiffusionPipeline
 generator = torch.Generator()
 generator = generator.manual_seed(0)
@@ -14,8 +16,8 @@ pipeline = DiffusionPipeline.from_pretrained(model_id)
 img = pipeline("a clip art of a hugging face", generator)
 # process image to PIL
-img = ((img + 1)*127.5).round().clamp(0, 255).to(torch.uint8).cpu().numpy()
+img = ((img + 1) * 127.5).round().clamp(0, 255).to(torch.uint8).cpu().numpy()
 image_pil = PIL.Image.fromarray(img)
 # save image
 image_pil.save("test.png")
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -84,6 +84,7 @@ _deps = [
    "isort>=5.5.4",
    "numpy",
    "pytest",
+    "regex!=2019.12.17",
    "requests",
    "torch>=1.4",
    "torchvision",
@@ -168,6 +169,7 @@ install_requires = [
    deps["filelock"],
    deps["huggingface-hub"],
    deps["numpy"],
+    deps["regex"],
    deps["requests"],
    deps["torch"],
    deps["torchvision"],

--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -7,7 +7,7 @@ __version__ = "0.0.1"
 from .modeling_utils import ModelMixin
 from .models.clip_text_transformer import CLIPTextModel
 from .models.unet import UNetModel
-from .models.unet_glide import GLIDETextToImageUNetModel, GLIDESuperResUNetModel
+from .models.unet_glide import GLIDESuperResUNetModel, GLIDETextToImageUNetModel
 from .models.unet_ldm import UNetLDMModel
 from .models.vqvae import VQModel
 from .pipeline_utils import DiffusionPipeline

--- a/src/diffusers/configuration_utils.py
+++ b/src/diffusers/configuration_utils.py
@@ -23,13 +23,13 @@ import os
 import re
 from typing import Any, Dict, Tuple, Union
-from requests import HTTPError
 from huggingface_hub import hf_hub_download
+from requests import HTTPError
+from . import __version__
 from .utils import (
-    HUGGINGFACE_CO_RESOLVE_ENDPOINT,
    DIFFUSERS_CACHE,
+    HUGGINGFACE_CO_RESOLVE_ENDPOINT,
    EntryNotFoundError,
    RepositoryNotFoundError,
    RevisionNotFoundError,
@@ -37,9 +37,6 @@ from .utils import (
 )
-from . import __version__
 logger = logging.get_logger(__name__)
 _re_configuration_file = re.compile(r"config\.(.*)\.json")
@@ -95,9 +92,7 @@ class ConfigMixin:
    @classmethod
    def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs):
-        config_dict = cls.get_config_dict(
+        config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
-            pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs
-        )
        init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs)
@@ -157,16 +152,16 @@ class ConfigMixin:
            except RepositoryNotFoundError:
                raise EnvironmentError(
-                    f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
+                    f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed"
-                    "'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
+                    " on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token"
-                    "permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
+                    " having permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and"
-                    "`use_auth_token=True`."
+                    " pass `use_auth_token=True`."
                )
            except RevisionNotFoundError:
                raise EnvironmentError(
-                    f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
+                    f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for"
-                    f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
+                    " this model name. Check the model page at"
-                    "available revisions."
+                    f" 'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
                )
            except EntryNotFoundError:
                raise EnvironmentError(
@@ -174,14 +169,16 @@ class ConfigMixin:
                )
            except HTTPError as err:
                raise EnvironmentError(
-                    f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n{err}"
+                    "There was a specific connection error when trying to load"
+                    f" {pretrained_model_name_or_path}:\n{err}"
                )
            except ValueError:
                raise EnvironmentError(
-                    f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in"
+                    f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
-                    f" the cached files and it looks like {pretrained_model_name_or_path} is not the path to a directory"
+                    f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
-                    f" containing a {cls.config_name} file.\nCheckout your internet connection or see how to run the"
+                    f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to"
-                    " library in offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
+                    " run the library in offline mode at"
+                    " 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
                )
            except EnvironmentError:
                raise EnvironmentError(
@@ -195,9 +192,7 @@ class ConfigMixin:
            # Load config dict
            config_dict = cls._dict_from_json_file(config_file)
        except (json.JSONDecodeError, UnicodeDecodeError):
-            raise EnvironmentError(
+            raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
-                f"It looks like the config file at '{config_file}' is not a valid JSON file."
-            )
        return config_dict

--- a/src/diffusers/dependency_versions_table.py
+++ b/src/diffusers/dependency_versions_table.py
@@ -3,29 +3,15 @@
 # 2. run `make deps_table_update``
 deps = {
    "Pillow": "Pillow",
-    "accelerate": "accelerate>=0.9.0",
    "black": "black~=22.0,>=22.3",
-    "codecarbon": "codecarbon==1.2.0",
+    "filelock": "filelock",
-    "dataclasses": "dataclasses",
+    "flake8": "flake8>=3.8.3",
-    "datasets": "datasets",
+    "huggingface-hub": "huggingface-hub",
-    "GitPython": "GitPython<3.1.19",
-    "hf-doc-builder": "hf-doc-builder>=0.3.0",
-    "huggingface-hub": "huggingface-hub>=0.1.0,<1.0",
-    "importlib_metadata": "importlib_metadata",
    "isort": "isort>=5.5.4",
-    "numpy": "numpy>=1.17",
+    "numpy": "numpy",
    "pytest": "pytest",
-    "pytest-timeout": "pytest-timeout",
-    "pytest-xdist": "pytest-xdist",
-    "python": "python>=3.7.0",
    "regex": "regex!=2019.12.17",
    "requests": "requests",
-    "sagemaker": "sagemaker>=2.31.0",
-    "tokenizers": "tokenizers>=0.11.1,!=0.11.3,<0.13",
    "torch": "torch>=1.4",
-    "torchaudio": "torchaudio",
+    "torchvision": "torchvision",
-    "tqdm": "tqdm>=4.27",
-    "unidic": "unidic>=1.0.2",
-    "unidic_lite": "unidic_lite>=1.0.7",
-    "uvicorn": "uvicorn",
 }
--- a/src/diffusers/dynamic_modules_utils.py
+++ b/src/diffusers/dynamic_modules_utils.py
@@ -23,7 +23,8 @@ from pathlib import Path
 from typing import Dict, Optional, Union
 from huggingface_hub import cached_download
-from .utils import HF_MODULES_CACHE, DIFFUSERS_DYNAMIC_MODULE_NAME, logging
+from .utils import DIFFUSERS_DYNAMIC_MODULE_NAME, HF_MODULES_CACHE, logging
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name

--- a/src/diffusers/modeling_utils.py
+++ b/src/diffusers/modeling_utils.py
@@ -20,8 +20,8 @@ from typing import Callable, List, Optional, Tuple, Union
 import torch
 from torch import Tensor, device
-from requests import HTTPError
 from huggingface_hub import hf_hub_download
+from requests import HTTPError
 from .utils import (
    CONFIG_NAME,
@@ -379,10 +379,13 @@ class ModelMixin(torch.nn.Module):
                    f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
                )
            except EntryNotFoundError:
-                raise EnvironmentError(f"{pretrained_model_name_or_path} does not appear to have a file named {model_file}.")
+                raise EnvironmentError(
+                    f"{pretrained_model_name_or_path} does not appear to have a file named {model_file}."
+                )
            except HTTPError as err:
                raise EnvironmentError(
-                    f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n{err}"
+                    "There was a specific connection error when trying to load"
+                    f" {pretrained_model_name_or_path}:\n{err}"
                )
            except ValueError:
                raise EnvironmentError(

--- a/src/diffusers/models/__init__.py
+++ b/src/diffusers/models/__init__.py
@@ -18,6 +18,6 @@
 from .clip_text_transformer import CLIPTextModel
 from .unet import UNetModel
-from .unet_glide import GLIDETextToImageUNetModel, GLIDESuperResUNetModel
+from .unet_glide import GLIDESuperResUNetModel, GLIDETextToImageUNetModel
 from .unet_ldm import UNetLDMModel
 from .vqvae import VQModel
\ No newline at end of file
--- a/src/diffusers/models/unet.py
+++ b/src/diffusers/models/unet.py
@@ -25,8 +25,8 @@ from torch.cuda.amp import GradScaler, autocast
 from torch.optim import Adam
 from torch.utils import data
-from torchvision import transforms, utils
 from PIL import Image
+from torchvision import transforms, utils
 from tqdm import tqdm
 from ..configuration_utils import ConfigMixin
@@ -335,19 +335,22 @@ class UNetModel(ModelMixin, ConfigMixin):
 # dataset classes
 class Dataset(data.Dataset):
-    def __init__(self, folder, image_size, exts=['jpg', 'jpeg', 'png']):
+    def __init__(self, folder, image_size, exts=["jpg", "jpeg", "png"]):
        super().__init__()
        self.folder = folder
        self.image_size = image_size
-        self.paths = [p for ext in exts for p in Path(f'{folder}').glob(f'**/*.{ext}')]
+        self.paths = [p for ext in exts for p in Path(f"{folder}").glob(f"**/*.{ext}")]
-        self.transform = transforms.Compose([
+        self.transform = transforms.Compose(
-            transforms.Resize(image_size),
+            [
-            transforms.RandomHorizontalFlip(),
+                transforms.Resize(image_size),
-            transforms.CenterCrop(image_size),
+                transforms.RandomHorizontalFlip(),
-            transforms.ToTensor()
+                transforms.CenterCrop(image_size),
-        ])
+                transforms.ToTensor(),
+            ]
+        )
    def __len__(self):
        return len(self.paths)
@@ -359,7 +362,7 @@ class Dataset(data.Dataset):
 # trainer class
-class EMA():
+class EMA:
    def __init__(self, beta):
        super().__init__()
        self.beta = beta

--- a/src/diffusers/models/unet_glide.py
+++ b/src/diffusers/models/unet_glide.py
@@ -647,24 +647,24 @@ class GLIDETextToImageUNetModel(GLIDEUNetModel):
    """
    def __init__(
-             self,
+        self,
-             in_channels=3,
+        in_channels=3,
-             model_channels=192,
+        model_channels=192,
-             out_channels=6,
+        out_channels=6,
-             num_res_blocks=3,
+        num_res_blocks=3,
-             attention_resolutions=(2, 4, 8),
+        attention_resolutions=(2, 4, 8),
-             dropout=0,
+        dropout=0,
-             channel_mult=(1, 2, 4, 8),
+        channel_mult=(1, 2, 4, 8),
-             conv_resample=True,
+        conv_resample=True,
-             dims=2,
+        dims=2,
-             use_checkpoint=False,
+        use_checkpoint=False,
-             use_fp16=False,
+        use_fp16=False,
-             num_heads=1,
+        num_heads=1,
-             num_head_channels=-1,
+        num_head_channels=-1,
-             num_heads_upsample=-1,
+        num_heads_upsample=-1,
-             use_scale_shift_norm=False,
+        use_scale_shift_norm=False,
-             resblock_updown=False,
+        resblock_updown=False,
-             transformer_dim=512
+        transformer_dim=512,
    ):
        super().__init__(
            in_channels=in_channels,
@@ -683,7 +683,7 @@ class GLIDETextToImageUNetModel(GLIDEUNetModel):
            num_heads_upsample=num_heads_upsample,
            use_scale_shift_norm=use_scale_shift_norm,
            resblock_updown=resblock_updown,
-            transformer_dim=transformer_dim
+            transformer_dim=transformer_dim,
        )
        self.register(
            in_channels=in_channels,
@@ -702,7 +702,7 @@ class GLIDETextToImageUNetModel(GLIDEUNetModel):
            num_heads_upsample=num_heads_upsample,
            use_scale_shift_norm=use_scale_shift_norm,
            resblock_updown=resblock_updown,
-            transformer_dim=transformer_dim
+            transformer_dim=transformer_dim,
        )
        self.transformer_proj = nn.Linear(transformer_dim, self.model_channels * 4)
@@ -737,23 +737,23 @@ class GLIDESuperResUNetModel(GLIDEUNetModel):
    """
    def __init__(
-            self,
+        self,
-            in_channels=3,
+        in_channels=3,
-            model_channels=192,
+        model_channels=192,
-            out_channels=6,
+        out_channels=6,
-            num_res_blocks=3,
+        num_res_blocks=3,
-            attention_resolutions=(2, 4, 8),
+        attention_resolutions=(2, 4, 8),
-            dropout=0,
+        dropout=0,
-            channel_mult=(1, 2, 4, 8),
+        channel_mult=(1, 2, 4, 8),
-            conv_resample=True,
+        conv_resample=True,
-            dims=2,
+        dims=2,
-            use_checkpoint=False,
+        use_checkpoint=False,
-            use_fp16=False,
+        use_fp16=False,
-            num_heads=1,
+        num_heads=1,
-            num_head_channels=-1,
+        num_head_channels=-1,
-            num_heads_upsample=-1,
+        num_heads_upsample=-1,
-            use_scale_shift_norm=False,
+        use_scale_shift_norm=False,
-            resblock_updown=False,
+        resblock_updown=False,
    ):
        super().__init__(
            in_channels=in_channels,
@@ -809,4 +809,4 @@ class GLIDESuperResUNetModel(GLIDEUNetModel):
            h = torch.cat([h, hs.pop()], dim=1)
            h = module(h, emb)
        return self.out(h)
\ No newline at end of file
--- a/src/diffusers/models/unet_ldm.py
+++ b/src/diffusers/models/unet_ldm.py
--- a/src/diffusers/pipeline_utils.py
+++ b/src/diffusers/pipeline_utils.py
@@ -20,10 +20,9 @@ from typing import Optional, Union
 from huggingface_hub import snapshot_download
-from .utils import logging, DIFFUSERS_CACHE
 from .configuration_utils import ConfigMixin
 from .dynamic_modules_utils import get_class_from_dynamic_module
+from .utils import DIFFUSERS_CACHE, logging
 INDEX_FILE = "diffusion_model.pt"
@@ -106,7 +105,7 @@ class DiffusionPipeline(ConfigMixin):
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
        r"""
-            Add docstrings
+        Add docstrings
        """
        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
        resume_download = kwargs.pop("resume_download", False)

--- a/src/diffusers/schedulers/gaussian_ddpm.py
+++ b/src/diffusers/schedulers/gaussian_ddpm.py
@@ -11,12 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import torch
 import math
+import torch
 from torch import nn
 from ..configuration_utils import ConfigMixin
-from .schedulers_utils import linear_beta_schedule, betas_for_alpha_bar
+from .schedulers_utils import betas_for_alpha_bar, linear_beta_schedule
 SAMPLING_CONFIG_NAME = "scheduler_config.json"