removed get alpha / get beta

2b8bc91c · Patrick von Platen · 5b8ce1e7 · 2b8bc91c · 2b8bc91c · 2b8bc91c
Commit 2b8bc91c authored Jun 20, 2022 by Patrick von Platen
20 changed files
--- a/1
+++ b/1
+# coding=utf-8
+# Copyright 2022 The HuggingFace Inc. team.
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" ConfigMixinuration base class and utilities."""
+import inspect
+import json
+import os
+import re
+from collections import OrderedDict
+from typing import Any, Dict, Tuple, Union
+from huggingface_hub import hf_hub_download
+from requests import HTTPError
+from . import __version__
+from .utils import (
+    DIFFUSERS_CACHE,
+    HUGGINGFACE_CO_RESOLVE_ENDPOINT,
+    EntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+    logging,
+)
+logger = logging.get_logger(__name__)
+_re_configuration_file = re.compile(r"config\.(.*)\.json")
+class ConfigMixin:
+    r"""
+    Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as
+    methods for loading/downloading/saving configurations.
+    """
+    config_name = None
+    def register_to_config(self, **kwargs):
+        if self.config_name is None:
+            raise NotImplementedError(f"Make sure that {self.__class__} has defined a class name `config_name`")
+        kwargs["_class_name"] = self.__class__.__name__
+        kwargs["_diffusers_version"] = __version__
+        for key, value in kwargs.items():
+            try:
+                setattr(self, key, value)
+            except AttributeError as err:
+                logger.error(f"Can't set {key} with value {value} for {self}")
+                raise err
+        if not hasattr(self, "_internal_dict"):
+            internal_dict = kwargs
+        else:
+            previous_dict = dict(self._internal_dict)
+            internal_dict = {**self._internal_dict, **kwargs}
+            logger.debug(f"Updating config from {previous_dict} to {internal_dict}")
+        self._internal_dict = FrozenDict(internal_dict)
+    def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
+        """
+        Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the
+        [`~ConfigMixin.from_config`] class method.
+        Args:
+            save_directory (`str` or `os.PathLike`):
+                Directory where the configuration JSON file will be saved (will be created if it does not exist).
+            kwargs:
+                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
+        """
+        if os.path.isfile(save_directory):
+            raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
+        os.makedirs(save_directory, exist_ok=True)
+        # If we save using the predefined names, we can load using `from_config`
+        output_config_file = os.path.join(save_directory, self.config_name)
+        self.to_json_file(output_config_file)
+        logger.info(f"ConfigMixinuration saved in {output_config_file}")
+    @classmethod
+    def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs):
+        config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
+        init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs)
+        model = cls(**init_dict)
+        if return_unused_kwargs:
+            return model, unused_kwargs
+        else:
+            return model
+    @classmethod
+    def get_config_dict(
+        cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
+        force_download = kwargs.pop("force_download", False)
+        resume_download = kwargs.pop("resume_download", False)
+        proxies = kwargs.pop("proxies", None)
+        use_auth_token = kwargs.pop("use_auth_token", None)
+        local_files_only = kwargs.pop("local_files_only", False)
+        revision = kwargs.pop("revision", None)
+        user_agent = {"file_type": "config"}
+        pretrained_model_name_or_path = str(pretrained_model_name_or_path)
+        if cls.config_name is None:
+            raise ValueError(
+                "`self.config_name` is not defined. Note that one should not load a config from "
+                "`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`"
+            )
+        if os.path.isfile(pretrained_model_name_or_path):
+            config_file = pretrained_model_name_or_path
+        elif os.path.isdir(pretrained_model_name_or_path):
+            if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)):
+                # Load from a PyTorch checkpoint
+                config_file = os.path.join(pretrained_model_name_or_path, cls.config_name)
+            else:
+                raise EnvironmentError(
+                    f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}."
+                )
+        else:
+            try:
+                # Load from URL or cache if already cached
+                config_file = hf_hub_download(
+                    pretrained_model_name_or_path,
+                    filename=cls.config_name,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    local_files_only=local_files_only,
+                    use_auth_token=use_auth_token,
+                    user_agent=user_agent,
+                )
+            except RepositoryNotFoundError:
+                raise EnvironmentError(
+                    f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed"
+                    " on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token"
+                    " having permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and"
+                    " pass `use_auth_token=True`."
+                )
+            except RevisionNotFoundError:
+                raise EnvironmentError(
+                    f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for"
+                    " this model name. Check the model page at"
+                    f" 'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
+                )
+            except EntryNotFoundError:
+                raise EnvironmentError(
+                    f"{pretrained_model_name_or_path} does not appear to have a file named {cls.config_name}."
+                )
+            except HTTPError as err:
+                raise EnvironmentError(
+                    "There was a specific connection error when trying to load"
+                    f" {pretrained_model_name_or_path}:\n{err}"
+                )
+            except ValueError:
+                raise EnvironmentError(
+                    f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
+                    f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
+                    f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to"
+                    " run the library in offline mode at"
+                    " 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
+                )
+            except EnvironmentError:
+                raise EnvironmentError(
+                    f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
+                    "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
+                    f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
+                    f"containing a {cls.config_name} file"
+                )
+        try:
+            # Load config dict
+            config_dict = cls._dict_from_json_file(config_file)
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
+        return config_dict
+    @classmethod
+    def extract_init_dict(cls, config_dict, **kwargs):
+        expected_keys = set(dict(inspect.signature(cls.__init__).parameters).keys())
+        expected_keys.remove("self")
+        init_dict = {}
+        for key in expected_keys:
+            if key in kwargs:
+                # overwrite key
+                init_dict[key] = kwargs.pop(key)
+            elif key in config_dict:
+                # use value from config dict
+                init_dict[key] = config_dict.pop(key)
+        unused_kwargs = config_dict.update(kwargs)
+        passed_keys = set(init_dict.keys())
+        if len(expected_keys - passed_keys) > 0:
+            logger.warning(
+                f"{expected_keys - passed_keys} was not found in config. Values will be initialized to default values."
+            )
+        return init_dict, unused_kwargs
+    @classmethod
+    def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
+        with open(json_file, "r", encoding="utf-8") as reader:
+            text = reader.read()
+        return json.loads(text)
+    def __repr__(self):
+        return f"{self.__class__.__name__} {self.to_json_string()}"
+    @property
+    def config(self) -> Dict[str, Any]:
+        return self._internal_dict
+    def to_json_string(self) -> str:
+        """
+        Serializes this instance to a JSON string.
+        Returns:
+            `str`: String containing all the attributes that make up this configuration instance in JSON format.
+        """
+        import ipdb; ipdb.set_trace()
+        config_dict = self._internal_dict
+        return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
+    def to_json_file(self, json_file_path: Union[str, os.PathLike]):
+        """
+        Save this instance to a JSON file.
+        Args:
+            json_file_path (`str` or `os.PathLike`):
+                Path to the JSON file in which this configuration instance's parameters will be saved.
+        """
+        with open(json_file_path, "w", encoding="utf-8") as writer:
+            writer.write(self.to_json_string())
+class FrozenDict(OrderedDict):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for key, value in self.items():
+            setattr(self, key, value)
+        self.__frozen = True
+    def __delitem__(self, *args, **kwargs):
+        raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.")
+    def setdefault(self, *args, **kwargs):
+        raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.")
+    def pop(self, *args, **kwargs):
+        raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.")
+    def update(self, *args, **kwargs):
+        raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.")
+    def __setattr__(self, name, value):
+        if hasattr(self, "__frozen") and self.__frozen:
+            raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.")
+        super().__setattr__(name, value)
+    def __setitem__(self, name, value):
+        if hasattr(self, "__frozen") and self.__frozen:
+            raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.")
+        super().__setitem__(name, value)
--- a/README.md
+++ b/README.md
@@ -159,7 +159,8 @@ eta = 0.0  # <- deterministic sampling
 for t in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps):
    # 1. predict noise residual
-    orig_t = noise_scheduler.get_orig_t(t, num_inference_steps)
+	orig_t = len(noise_scheduler) // num_inference_steps * t
    with torch.inference_mode():
        residual = unet(image, orig_t)

--- a/src/diffusers/configuration_utils.py
+++ b/src/diffusers/configuration_utils.py
@@ -241,7 +241,7 @@ class ConfigMixin:
        Returns:
            `str`: String containing all the attributes that make up this configuration instance in JSON format.
        """
-        config_dict = self._internal_dict
+        config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {}
        return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
    def to_json_file(self, json_file_path: Union[str, os.PathLike]):

--- a/src/diffusers/pipelines/old/ddim/README.md
+++ b/src/diffusers/pipelines/old/ddim/README.md
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-->
-# Denoising Diffusion Implicit Models (DDIM)
-## Overview
-DDPM was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) by *Jiaming Song, Chenlin Meng, Stefano Ermon*
-The abstract from the paper is the following:
-*Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space.*
-Tips:
- ...
- ...
-This model was contributed by [???](https://huggingface.co/???). The original code can be found [here](https://github.com/hojonathanho/diffusion).
--- a/src/diffusers/pipelines/old/ddim/__init__.py
+++ b/src/diffusers/pipelines/old/ddim/__init__.py
-from .pipeline_ddim import DDIM
--- a/src/diffusers/pipelines/old/ddim/example.py
+++ b/src/diffusers/pipelines/old/ddim/example.py
-#!/usr/bin/env python3
-import os
-import pathlib
-import numpy as np
-import PIL.Image
-from modeling_ddim import DDIM
-model_ids = ["ddim-celeba-hq", "ddim-lsun-church", "ddim-lsun-bedroom"]
-for model_id in model_ids:
-    path = os.path.join("/home/patrick/images/hf", model_id)
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    ddpm = DDIM.from_pretrained("fusing/" + model_id)
-    image = ddpm(batch_size=4)
-    image_processed = image.cpu().permute(0, 2, 3, 1)
-    image_processed = (image_processed + 1.0) * 127.5
-    image_processed = image_processed.numpy().astype(np.uint8)
-    for i in range(image_processed.shape[0]):
-        image_pil = PIL.Image.fromarray(image_processed[i])
-        image_pil.save(os.path.join(path, f"image_{i}.png"))
--- a/src/diffusers/pipelines/old/ddim/run_ddpm.py
+++ b/src/diffusers/pipelines/old/ddim/run_ddpm.py
-#!/usr/bin/env python3
-import torch
-from diffusers import DDPMScheduler, UNetModel
-model = UNetModel(dim=64, dim_mults=(1, 2, 4, 8))
-diffusion = DDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2
-training_images = torch.randn(8, 3, 128, 128)  # your images need to be normalized from a range of -1 to +1
-loss = diffusion(training_images)
-loss.backward()
-# after a lot of training
-sampled_images = diffusion.sample(batch_size=4)
-sampled_images.shape  # (4, 3, 128, 128)
--- a/src/diffusers/pipelines/old/ddim/run_inference.py
+++ b/src/diffusers/pipelines/old/ddim/run_inference.py
-#!/usr/bin/env python3
-# !pip install diffusers
-import numpy as np
-import PIL.Image
-from modeling_ddim import DDIM
-model_id = "fusing/ddpm-cifar10"
-model_id = "fusing/ddpm-lsun-bedroom"
-# load model and scheduler
-ddpm = DDIM.from_pretrained(model_id)
-# run pipeline in inference (sample random noise and denoise)
-image = ddpm()
-# process image to PIL
-image_processed = image.cpu().permute(0, 2, 3, 1)
-image_processed = (image_processed + 1.0) * 127.5
-image_processed = image_processed.numpy().astype(np.uint8)
-image_pil = PIL.Image.fromarray(image_processed[0])
-# save image
-image_pil.save("/home/patrick/images/show.png")
--- a/src/diffusers/pipelines/old/ddpm/README.md
+++ b/src/diffusers/pipelines/old/ddpm/README.md
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-->
-# Denoising Diffusion Probabilistic Models (DDPM)
-## Overview
-DDPM was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) by *Jonathan Ho, Ajay Jain, Pieter Abbeel*.
-The abstract from the paper is the following:
-*We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. Our implementation is available at this https URL*
-Tips:
- ...
- ...
-This model was contributed by [???](https://huggingface.co/???). The original code can be found [here](https://github.com/hojonathanho/diffusion).
-![ddpm](https://user-images.githubusercontent.com/23423619/171627620-e3406711-1e20-4a99-8e30-ec5a86a465be.png)
--- a/src/diffusers/pipelines/old/ddpm/example.py
+++ b/src/diffusers/pipelines/old/ddpm/example.py
-#!/usr/bin/env python3
-import os
-import pathlib
-import numpy as np
-import PIL.Image
-from modeling_ddpm import DDPM
-model_ids = [
-    "ddpm-lsun-cat",
-    "ddpm-lsun-cat-ema",
-    "ddpm-lsun-church-ema",
-    "ddpm-lsun-church",
-    "ddpm-lsun-bedroom",
-    "ddpm-lsun-bedroom-ema",
-    "ddpm-cifar10-ema",
-    "ddpm-cifar10",
-    "ddpm-celeba-hq",
-    "ddpm-celeba-hq-ema",
-]
-for model_id in model_ids:
-    path = os.path.join("/home/patrick/images/hf", model_id)
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    ddpm = DDPM.from_pretrained("fusing/" + model_id)
-    image = ddpm(batch_size=4)
-    image_processed = image.cpu().permute(0, 2, 3, 1)
-    image_processed = (image_processed + 1.0) * 127.5
-    image_processed = image_processed.numpy().astype(np.uint8)
-    for i in range(image_processed.shape[0]):
-        image_pil = PIL.Image.fromarray(image_processed[i])
-        image_pil.save(os.path.join(path, f"image_{i}.png"))
--- a/src/diffusers/pipelines/old/ddpm/run_ddpm.py
+++ b/src/diffusers/pipelines/old/ddpm/run_ddpm.py
-#!/usr/bin/env python3
-import torch
-from diffusers import DDPMScheduler, UNetModel
-model = UNetModel(dim=64, dim_mults=(1, 2, 4, 8))
-diffusion = DDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2
-training_images = torch.randn(8, 3, 128, 128)  # your images need to be normalized from a range of -1 to +1
-loss = diffusion(training_images)
-loss.backward()
-# after a lot of training
-sampled_images = diffusion.sample(batch_size=4)
-sampled_images.shape  # (4, 3, 128, 128)
--- a/src/diffusers/pipelines/old/glide/README.md
+++ b/src/diffusers/pipelines/old/glide/README.md
-# References
-[GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models](https://arxiv.org/pdf/2112.10741.pdf)
-[Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/pdf/2105.05233.pdf)
\ No newline at end of file
--- a/src/diffusers/pipelines/old/glide/convert_weights.py
+++ b/src/diffusers/pipelines/old/glide/convert_weights.py
-import torch
-from torch import nn
-from diffusers import ClassifierFreeGuidanceScheduler, GLIDESuperResUNetModel, GLIDETextToImageUNetModel
-from modeling_glide import GLIDE, CLIPTextModel
-from transformers import CLIPTextConfig, GPT2Tokenizer
-# wget https://openaipublic.blob.core.windows.net/diffusion/dec-2021/base.pt
-state_dict = torch.load("base.pt", map_location="cpu")
-state_dict = {k: nn.Parameter(v) for k, v in state_dict.items()}
-### Convert the text encoder
-config = CLIPTextConfig(
-    vocab_size=50257,
-    max_position_embeddings=128,
-    hidden_size=512,
-    intermediate_size=2048,
-    num_hidden_layers=16,
-    num_attention_heads=8,
-    use_padding_embeddings=True,
-)
-model = CLIPTextModel(config).eval()
-tokenizer = GPT2Tokenizer(
-    "./glide-base/tokenizer/vocab.json", "./glide-base/tokenizer/merges.txt", pad_token="<|endoftext|>"
-)
-hf_encoder = model.text_model
-hf_encoder.embeddings.token_embedding.weight = state_dict["token_embedding.weight"]
-hf_encoder.embeddings.position_embedding.weight.data = state_dict["positional_embedding"]
-hf_encoder.embeddings.padding_embedding.weight.data = state_dict["padding_embedding"]
-hf_encoder.final_layer_norm.weight = state_dict["final_ln.weight"]
-hf_encoder.final_layer_norm.bias = state_dict["final_ln.bias"]
-for layer_idx in range(config.num_hidden_layers):
-    hf_layer = hf_encoder.encoder.layers[layer_idx]
-    hf_layer.self_attn.qkv_proj.weight = state_dict[f"transformer.resblocks.{layer_idx}.attn.c_qkv.weight"]
-    hf_layer.self_attn.qkv_proj.bias = state_dict[f"transformer.resblocks.{layer_idx}.attn.c_qkv.bias"]
-    hf_layer.self_attn.out_proj.weight = state_dict[f"transformer.resblocks.{layer_idx}.attn.c_proj.weight"]
-    hf_layer.self_attn.out_proj.bias = state_dict[f"transformer.resblocks.{layer_idx}.attn.c_proj.bias"]
-    hf_layer.layer_norm1.weight = state_dict[f"transformer.resblocks.{layer_idx}.ln_1.weight"]
-    hf_layer.layer_norm1.bias = state_dict[f"transformer.resblocks.{layer_idx}.ln_1.bias"]
-    hf_layer.layer_norm2.weight = state_dict[f"transformer.resblocks.{layer_idx}.ln_2.weight"]
-    hf_layer.layer_norm2.bias = state_dict[f"transformer.resblocks.{layer_idx}.ln_2.bias"]
-    hf_layer.mlp.fc1.weight = state_dict[f"transformer.resblocks.{layer_idx}.mlp.c_fc.weight"]
-    hf_layer.mlp.fc1.bias = state_dict[f"transformer.resblocks.{layer_idx}.mlp.c_fc.bias"]
-    hf_layer.mlp.fc2.weight = state_dict[f"transformer.resblocks.{layer_idx}.mlp.c_proj.weight"]
-    hf_layer.mlp.fc2.bias = state_dict[f"transformer.resblocks.{layer_idx}.mlp.c_proj.bias"]
-### Convert the Text-to-Image UNet
-text2im_model = GLIDETextToImageUNetModel(
-    in_channels=3,
-    model_channels=192,
-    out_channels=6,
-    num_res_blocks=3,
-    attention_resolutions=(2, 4, 8),
-    dropout=0.1,
-    channel_mult=(1, 2, 3, 4),
-    num_heads=1,
-    num_head_channels=64,
-    num_heads_upsample=1,
-    use_scale_shift_norm=True,
-    resblock_updown=True,
-    transformer_dim=512,
-)
-text2im_model.load_state_dict(state_dict, strict=False)
-text_scheduler = ClassifierFreeGuidanceScheduler(timesteps=1000, beta_schedule="squaredcos_cap_v2")
-### Convert the Super-Resolution UNet
-# wget https://openaipublic.blob.core.windows.net/diffusion/dec-2021/upsample.pt
-ups_state_dict = torch.load("upsample.pt", map_location="cpu")
-superres_model = GLIDESuperResUNetModel(
-    in_channels=6,
-    model_channels=192,
-    out_channels=6,
-    num_res_blocks=2,
-    attention_resolutions=(8, 16, 32),
-    dropout=0.1,
-    channel_mult=(1, 1, 2, 2, 4, 4),
-    num_heads=1,
-    num_head_channels=64,
-    num_heads_upsample=1,
-    use_scale_shift_norm=True,
-    resblock_updown=True,
-)
-superres_model.load_state_dict(ups_state_dict, strict=False)
-upscale_scheduler = DDIMScheduler(timesteps=1000, beta_schedule="linear")
-glide = GLIDE(
-    text_unet=text2im_model,
-    text_noise_scheduler=text_scheduler,
-    text_encoder=model,
-    tokenizer=tokenizer,
-    upscale_unet=superres_model,
-    upscale_noise_scheduler=upscale_scheduler,
-)
-glide.save_pretrained("./glide-base")
--- a/src/diffusers/pipelines/old/glide/modeling_glide.py
+++ b/src/diffusers/pipelines/old/glide/modeling_glide.py
--- a/src/diffusers/pipelines/old/glide/run_glide.py
+++ b/src/diffusers/pipelines/old/glide/run_glide.py
-import torch
-import PIL.Image
-from diffusers import DiffusionPipeline
-generator = torch.Generator()
-generator = generator.manual_seed(0)
-model_id = "fusing/glide-base"
-# load model and scheduler
-pipeline = DiffusionPipeline.from_pretrained(model_id)
-# run inference (text-conditioned denoising + upscaling)
-img = pipeline("a crayon drawing of a corgi", generator)
-# process image to PIL
-img = img.squeeze(0)
-img = ((img + 1) * 127.5).round().clamp(0, 255).to(torch.uint8).cpu().numpy()
-image_pil = PIL.Image.fromarray(img)
-# save image
-image_pil.save("test.png")
--- a/src/diffusers/pipelines/old/latent_diffusion/README.md
+++ b/src/diffusers/pipelines/old/latent_diffusion/README.md
--- a/src/diffusers/pipelines/old/latent_diffusion/configuration_ldmbert.py
+++ b/src/diffusers/pipelines/old/latent_diffusion/configuration_ldmbert.py
-# coding=utf-8
-# Copyright 2022 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" LDMBERT model configuration"""
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
-logger = logging.get_logger(__name__)
-LDMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "ldm-bert": "https://huggingface.co/ldm-bert/resolve/main/config.json",
-}
-class LDMBertConfig(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a [`LDMBertModel`]. It is used to instantiate a
-    LDMBERT model according to the specified arguments, defining the model architecture. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of the LDMBERT
-    [facebook/ldmbert-large](https://huggingface.co/facebook/ldmbert-large) architecture.
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-    Args:
-        vocab_size (`int`, *optional*, defaults to 50265):
-            Vocabulary size of the LDMBERT model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`LDMBertModel`] or [`TFLDMBertModel`].
-        d_model (`int`, *optional*, defaults to 1024):
-            Dimensionality of the layers and the pooler layer.
-        encoder_layers (`int`, *optional*, defaults to 12):
-            Number of encoder layers.
-        decoder_layers (`int`, *optional*, defaults to 12):
-            Number of decoder layers.
-        encoder_attention_heads (`int`, *optional*, defaults to 16):
-            Number of attention heads for each attention layer in the Transformer encoder.
-        decoder_attention_heads (`int`, *optional*, defaults to 16):
-            Number of attention heads for each attention layer in the Transformer decoder.
-        decoder_ffn_dim (`int`, *optional*, defaults to 4096):
-            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
-        encoder_ffn_dim (`int`, *optional*, defaults to 4096):
-            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
-        activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
-            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
-            `"relu"`, `"silu"` and `"gelu_new"` are supported.
-        dropout (`float`, *optional*, defaults to 0.1):
-            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
-        attention_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        activation_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for activations inside the fully connected layer.
-        classifier_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for classifier.
-        max_position_embeddings (`int`, *optional*, defaults to 1024):
-            The maximum sequence length that this model might ever be used with. Typically set this to something large
-            just in case (e.g., 512 or 1024 or 2048).
-        init_std (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
-            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
-            for more details.
-        decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
-            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
-            for more details.
-        scale_embedding (`bool`, *optional*, defaults to `False`):
-            Scale embeddings by diving by sqrt(d_model).
-        use_cache (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return the last key/values attentions (not used by all models).
-        num_labels: (`int`, *optional*, defaults to 3):
-            The number of labels to use in [`LDMBertForSequenceClassification`].
-        forced_eos_token_id (`int`, *optional*, defaults to 2):
-            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
-            `eos_token_id`.
-    Example:
-    ```python
-    >>> from transformers import LDMBertModel, LDMBertConfig
-    >>> # Initializing a LDMBERT facebook/ldmbert-large style configuration
-    >>> configuration = LDMBertConfig()
-    >>> # Initializing a model from the facebook/ldmbert-large style configuration
-    >>> model = LDMBertModel(configuration)
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```"""
-    model_type = "ldmbert"
-    keys_to_ignore_at_inference = ["past_key_values"]
-    attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
-    def __init__(
-        self,
-        vocab_size=30522,
-        max_position_embeddings=77,
-        encoder_layers=32,
-        encoder_ffn_dim=5120,
-        encoder_attention_heads=8,
-        head_dim=64,
-        encoder_layerdrop=0.0,
-        activation_function="gelu",
-        d_model=1280,
-        dropout=0.1,
-        attention_dropout=0.0,
-        activation_dropout=0.0,
-        init_std=0.02,
-        classifier_dropout=0.0,
-        scale_embedding=False,
-        use_cache=True,
-        pad_token_id=0,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.d_model = d_model
-        self.encoder_ffn_dim = encoder_ffn_dim
-        self.encoder_layers = encoder_layers
-        self.encoder_attention_heads = encoder_attention_heads
-        self.head_dim = head_dim
-        self.dropout = dropout
-        self.attention_dropout = attention_dropout
-        self.activation_dropout = activation_dropout
-        self.activation_function = activation_function
-        self.init_std = init_std
-        self.encoder_layerdrop = encoder_layerdrop
-        self.classifier_dropout = classifier_dropout
-        self.use_cache = use_cache
-        self.num_hidden_layers = encoder_layers
-        self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
-        super().__init__(pad_token_id=pad_token_id, **kwargs)
--- a/src/diffusers/pipelines/old/latent_diffusion/modeling_latent_diffusion.py
+++ b/src/diffusers/pipelines/old/latent_diffusion/modeling_latent_diffusion.py
-import torch
-import tqdm
-from diffusers import DiffusionPipeline
-from .configuration_ldmbert import LDMBertConfig  # NOQA
-from .modeling_ldmbert import LDMBertModel  # NOQA
-# add these relative imports here, so we can load from hub
-from .modeling_vae import AutoencoderKL  # NOQA
-class LatentDiffusion(DiffusionPipeline):
-    def __init__(self, vqvae, bert, tokenizer, unet, noise_scheduler):
-        super().__init__()
-        self.register_modules(vqvae=vqvae, bert=bert, tokenizer=tokenizer, unet=unet, noise_scheduler=noise_scheduler)
-    @torch.no_grad()
-    def __call__(
-        self,
-        prompt,
-        batch_size=1,
-        generator=None,
-        torch_device=None,
-        eta=0.0,
-        guidance_scale=1.0,
-        num_inference_steps=50,
-    ):
-        # eta corresponds to η in paper and should be between [0, 1]
-        if torch_device is None:
-            torch_device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.unet.to(torch_device)
-        self.vqvae.to(torch_device)
-        self.bert.to(torch_device)
-        # get unconditional embeddings for classifier free guidence
-        if guidance_scale != 1.0:
-            uncond_input = self.tokenizer([""], padding="max_length", max_length=77, return_tensors="pt").to(
-                torch_device
-            )
-            uncond_embeddings = self.bert(uncond_input.input_ids)[0]
-        # get text embedding
-        text_input = self.tokenizer(prompt, padding="max_length", max_length=77, return_tensors="pt").to(torch_device)
-        text_embedding = self.bert(text_input.input_ids)[0]
-        num_trained_timesteps = self.noise_scheduler.config.timesteps
-        inference_step_times = range(0, num_trained_timesteps, num_trained_timesteps // num_inference_steps)
-        image = self.noise_scheduler.sample_noise(
-            (batch_size, self.unet.in_channels, self.unet.image_size, self.unet.image_size),
-            device=torch_device,
-            generator=generator,
-        )
-        # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
-        # Ideally, read DDIM paper in-detail understanding
-        # Notation (<variable name> -> <name in paper>
-        # - pred_noise_t -> e_theta(x_t, t)
-        # - pred_original_image -> f_theta(x_t, t) or x_0
-        # - std_dev_t -> sigma_t
-        # - eta -> η
-        # - pred_image_direction -> "direction pointingc to x_t"
-        # - pred_prev_image -> "x_t-1"
-        for t in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps):
-            # guidance_scale of 1 means no guidance
-            if guidance_scale == 1.0:
-                image_in = image
-                context = text_embedding
-                timesteps = torch.tensor([inference_step_times[t]] * image.shape[0], device=torch_device)
-            else:
-                # for classifier free guidance, we need to do two forward passes
-                # here we concanate embedding and unconditioned embedding in a single batch
-                # to avoid doing two forward passes
-                image_in = torch.cat([image] * 2)
-                context = torch.cat([uncond_embeddings, text_embedding])
-                timesteps = torch.tensor([inference_step_times[t]] * image.shape[0], device=torch_device)
-            # 1. predict noise residual
-            pred_noise_t = self.unet(image_in, timesteps, context=context)
-            # perform guidance
-            if guidance_scale != 1.0:
-                pred_noise_t_uncond, pred_noise_t = pred_noise_t.chunk(2)
-                pred_noise_t = pred_noise_t_uncond + guidance_scale * (pred_noise_t - pred_noise_t_uncond)
-            # 2. predict previous mean of image x_t-1
-            pred_prev_image = self.noise_scheduler.step(pred_noise_t, image, t, num_inference_steps, eta)
-            # 3. optionally sample variance
-            variance = 0
-            if eta > 0:
-                noise = self.noise_scheduler.sample_noise(image.shape, device=image.device, generator=generator)
-                variance = self.noise_scheduler.get_variance(t, num_inference_steps).sqrt() * eta * noise
-            # 4. set current image to prev_image: x_t -> x_t-1
-            image = pred_prev_image + variance
-        # scale and decode image with vae
-        image = 1 / 0.18215 * image
-        image = self.vqvae.decode(image)
-        image = torch.clamp((image + 1.0) / 2.0, min=0.0, max=1.0)
-        return image
--- a/src/diffusers/pipelines/old/latent_diffusion/modeling_ldmbert.py
+++ b/src/diffusers/pipelines/old/latent_diffusion/modeling_ldmbert.py
--- a/src/diffusers/pipelines/old/latent_diffusion/modeling_vae.py
+++ b/src/diffusers/pipelines/old/latent_diffusion/modeling_vae.py