Commit d1715d33 authored by anton-l's avatar anton-l
Browse files

Merge branch 'main' of github.com:huggingface/diffusers

parents db2a1077 3e801673
...@@ -7,8 +7,8 @@ ...@@ -7,8 +7,8 @@
![model_diff_1_50](https://user-images.githubusercontent.com/23423619/171610307-dab0cd8b-75da-4d4e-9f5a-5922072e2bb5.png) ![model_diff_1_50](https://user-images.githubusercontent.com/23423619/171610307-dab0cd8b-75da-4d4e-9f5a-5922072e2bb5.png)
**Samplers**: Algorithm to *train* and *sample* from **Model**. Defines alpha and beta schedule, timesteps, etc.. **Schedulers**: Algorithm to sample noise schedule for both *training* and *inference*. Defines alpha and beta schedule, timesteps, etc..
*Example: Vanilla DDPM, DDIM, PMLS, DEIN* *Example: Gaussian DDPM, DDIM, PMLS, DEIN*
![sampling](https://user-images.githubusercontent.com/23423619/171608981-3ad05953-a684-4c82-89f8-62a459147a07.png) ![sampling](https://user-images.githubusercontent.com/23423619/171608981-3ad05953-a684-4c82-89f8-62a459147a07.png)
![training](https://user-images.githubusercontent.com/23423619/171608964-b3260cce-e6b4-4841-959d-7d8ba4b8d1b2.png) ![training](https://user-images.githubusercontent.com/23423619/171608964-b3260cce-e6b4-4841-959d-7d8ba4b8d1b2.png)
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
## 1. `diffusers` as a central modular diffusion and sampler library ## 1. `diffusers` as a central modular diffusion and sampler library
`diffusers` should be more modularized than `transformers` so that parts of it can be easily used in other libraries. `diffusers` is more modularized than `transformers`. The idea is that researchers and engineers can use only parts of the library easily for the own use cases.
It could become a central place for all kinds of models, schedulers, training utils and processors required when using diffusion models in audio, vision, ... It could become a central place for all kinds of models, schedulers, training utils and processors that one can mix and match for one's own use case.
One should be able to save both models and samplers as well as load them from the Hub. Both models and scredulers should be load- and saveable from the Hub.
Example: Example:
...@@ -78,8 +78,8 @@ image_pil = PIL.Image.fromarray(image_processed[0]) ...@@ -78,8 +78,8 @@ image_pil = PIL.Image.fromarray(image_processed[0])
image_pil.save("test.png") image_pil.save("test.png")
``` ```
## 2. `diffusers` as a collection of most import Diffusion models (GLIDE, Dalle, ...) ## 2. `diffusers` as a collection of most important Diffusion systems (GLIDE, Dalle, ...)
`models` directory in repository hosts complete diffusion training code & pipelines. Easily load & saveable from the Hub. Will be possible to use just from pip `diffusers` version: `models` directory in repository hosts the complete code necessary for running a diffusion system as well as to train it. A `DiffusionPipeline` class allows to easily run the diffusion model in inference:
Example: Example:
...@@ -113,38 +113,48 @@ image_pil.save("test.png") ...@@ -113,38 +113,48 @@ image_pil.save("test.png")
│   │   ├── modeling_fastdiff.py │   │   ├── modeling_fastdiff.py
│   │   ├── README.md │   │   ├── README.md
│   │   └── run_fastdiff.py │   │   └── run_fastdiff.py
│   ├── __init__.py
│   └── vision │   └── vision
│   ├── dalle2 │   ├── dalle2
│   │   ├── modeling_dalle2.py │   │   ├── modeling_dalle2.py
│   │   ├── README.md │   │   ├── README.md
│   │   └── run_dalle2.py │   │   └── run_dalle2.py
│   ├── ddpm │   ├── ddpm
│   │   ├── example.py
│   │   ├── modeling_ddpm.py │   │   ├── modeling_ddpm.py
│   │   ├── README.md │   │   ├── README.md
│   │   └── run_ddpm.py │   │   └── run_ddpm.py
│   ├── glide │   ├── glide
│   │   ├── modeling_glide.py │   │   ├── modeling_glide.py
│   │   ├── modeling_vqvae.py.py
│   │   ├── README.md │   │   ├── README.md
│   │   └── run_dalle2.py │   │   └── run_glide.py
│   ├── imagen │   ├── imagen
│   │   ├── modeling_dalle2.py │   │   ├── modeling_dalle2.py
│   │   ├── README.md │   │   ├── README.md
│   │   └── run_dalle2.py │   │   └── run_dalle2.py
│   ├── __init__.py
│   └── latent_diffusion │   └── latent_diffusion
│   ├── modeling_latent_diffusion.py │   ├── modeling_latent_diffusion.py
│   ├── README.md │   ├── README.md
│   └── run_latent_diffusion.py │   └── run_latent_diffusion.py
├── pyproject.toml
├── README.md
├── setup.cfg
├── setup.py
├── src ├── src
│   └── diffusers │   └── diffusers
│   ├── configuration_utils.py │   ├── configuration_utils.py
│   ├── __init__.py │   ├── __init__.py
│   ├── modeling_utils.py │   ├── modeling_utils.py
│   ├── models │   ├── models
│   │   ├── __init__.py
│   │   ├── unet_glide.py
│   │   └── unet.py │   │   └── unet.py
│   ├── processors │   ├── pipeline_utils.py
│   └── schedulers │   └── schedulers
│   ├── gaussian_ddpm.py │   ├── gaussian_ddpm.py
│   ├── __init__.py
├── tests ├── tests
│   └── test_modeling_utils.py │   └── test_modeling_utils.py
``` ```
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import random import random
import tempfile import tempfile
import unittest import unittest
...@@ -28,6 +29,7 @@ from models.vision.ddpm.modeling_ddpm import DDPM ...@@ -28,6 +29,7 @@ from models.vision.ddpm.modeling_ddpm import DDPM
global_rng = random.Random() global_rng = random.Random()
torch_device = "cuda" if torch.cuda.is_available() else "cpu" torch_device = "cuda" if torch.cuda.is_available() else "cpu"
torch.backends.cuda.matmul.allow_tf32 = False
def parse_flag_from_env(key, default=False): def parse_flag_from_env(key, default=False):
...@@ -113,8 +115,7 @@ class SamplerTesterMixin(unittest.TestCase): ...@@ -113,8 +115,7 @@ class SamplerTesterMixin(unittest.TestCase):
@slow @slow
def test_sample(self): def test_sample(self):
generator = torch.Generator() generator = torch.manual_seed(0)
generator = generator.manual_seed(6694729458485568)
# 1. Load models # 1. Load models
scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church") scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church")
...@@ -159,18 +160,17 @@ class SamplerTesterMixin(unittest.TestCase): ...@@ -159,18 +160,17 @@ class SamplerTesterMixin(unittest.TestCase):
assert image.shape == (1, 3, 256, 256) assert image.shape == (1, 3, 256, 256)
image_slice = image[0, -1, -3:, -3:].cpu() image_slice = image[0, -1, -3:, -3:].cpu()
assert (image_slice - torch.tensor([[-0.0598, -0.0611, -0.0506], [-0.0726, 0.0220, 0.0103], [-0.0723, -0.1310, -0.2458]])).abs().sum() < 1e-3 expected_slice = torch.tensor([-0.1636, -0.1765, -0.1968, -0.1338, -0.1432, -0.1622, -0.1793, -0.2001, -0.2280])
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
def test_sample_fast(self): def test_sample_fast(self):
# 1. Load models # 1. Load models
generator = torch.Generator() generator = torch.manual_seed(0)
generator = generator.manual_seed(6694729458485568)
scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church", timesteps=10) scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church", timesteps=10)
model = UNetModel.from_pretrained("fusing/ddpm-lsun-church").to(torch_device) model = UNetModel.from_pretrained("fusing/ddpm-lsun-church").to(torch_device)
# 2. Sample gaussian noise # 2. Sample gaussian noise
torch.manual_seed(0)
image = scheduler.sample_noise((1, model.in_channels, model.resolution, model.resolution), device=torch_device, generator=generator) image = scheduler.sample_noise((1, model.in_channels, model.resolution, model.resolution), device=torch_device, generator=generator)
# 3. Denoise # 3. Denoise
...@@ -200,7 +200,8 @@ class SamplerTesterMixin(unittest.TestCase): ...@@ -200,7 +200,8 @@ class SamplerTesterMixin(unittest.TestCase):
assert image.shape == (1, 3, 256, 256) assert image.shape == (1, 3, 256, 256)
image_slice = image[0, -1, -3:, -3:].cpu() image_slice = image[0, -1, -3:, -3:].cpu()
assert (image_slice - torch.tensor([[0.1746, 0.5125, -0.7920], [-0.5734, -0.2910, -0.1984], [0.4090, -0.7740, -0.3941]])).abs().sum() < 1e-3 expected_slice = torch.tensor([-0.0304, -0.1895, -0.2436, -0.9837, -0.5422, 0.1931, -0.8175, 0.0862, -0.7783])
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
class PipelineTesterMixin(unittest.TestCase): class PipelineTesterMixin(unittest.TestCase):
...@@ -214,16 +215,14 @@ class PipelineTesterMixin(unittest.TestCase): ...@@ -214,16 +215,14 @@ class PipelineTesterMixin(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
ddpm.save_pretrained(tmpdirname) ddpm.save_pretrained(tmpdirname)
new_ddpm = DDPM.from_pretrained(tmpdirname) new_ddpm = DDPM.from_pretrained(tmpdirname)
generator = torch.Generator() generator = torch.manual_seed(0)
generator = generator.manual_seed(669472945848556)
image = ddpm(generator=generator) image = ddpm(generator=generator)
generator = generator.manual_seed(669472945848556) generator = generator.manual_seed(0)
new_image = new_ddpm(generator=generator) new_image = new_ddpm(generator=generator)
assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass" assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass"
@slow @slow
def test_from_pretrained_hub(self): def test_from_pretrained_hub(self):
...@@ -235,12 +234,10 @@ class PipelineTesterMixin(unittest.TestCase): ...@@ -235,12 +234,10 @@ class PipelineTesterMixin(unittest.TestCase):
ddpm.noise_scheduler.num_timesteps = 10 ddpm.noise_scheduler.num_timesteps = 10
ddpm_from_hub.noise_scheduler.num_timesteps = 10 ddpm_from_hub.noise_scheduler.num_timesteps = 10
generator = torch.manual_seed(0)
generator = torch.Generator(device=torch_device)
generator = generator.manual_seed(669472945848556)
image = ddpm(generator=generator) image = ddpm(generator=generator)
generator = generator.manual_seed(669472945848556) generator = generator.manual_seed(0)
new_image = ddpm_from_hub(generator=generator) new_image = ddpm_from_hub(generator=generator)
assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass" assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment