Merge branch 'main' of github.com:huggingface/diffusers

d1715d33 · anton-l · db2a1077 · 3e801673 · d1715d33 · d1715d33
Commit d1715d33 authored Jun 07, 2022 by anton-l
Show whitespace changes
Inline Side-by-side

Showing with 33 additions and 26 deletions

README.md README.md +20 -10

tests/test_modeling_utils.py tests/test_modeling_utils.py +13 -16

No files found.
--- a/README.md
+++ b/README.md
@@ -7,8 +7,8 @@

 ![model_diff_1_50](https://user-images.githubusercontent.com/23423619/171610307-dab0cd8b-75da-4d4e-9f5a-5922072e2bb5.png)

-**Samplers**: Algorithm to *train* and *sample* from **Model**. Defines alpha and beta schedule, timesteps, etc..
-*Example: Vanilla DDPM, DDIM, PMLS, DEIN*
+**Schedulers**: Algorithm to sample noise schedule for both *training* and *inference*. Defines alpha and beta schedule, timesteps, etc..
+*Example: Gaussian DDPM, DDIM, PMLS, DEIN*

 ![sampling](https://user-images.githubusercontent.com/23423619/171608981-3ad05953-a684-4c82-89f8-62a459147a07.png)
 ![training](https://user-images.githubusercontent.com/23423619/171608964-b3260cce-e6b4-4841-959d-7d8ba4b8d1b2.png)
@@ -20,9 +20,9 @@

 ## 1. `diffusers` as a central modular diffusion and sampler library

-`diffusers` should be more modularized than `transformers` so that parts of it can be easily used in other libraries.
-It could become a central place for all kinds of models, schedulers, training utils and processors required when using diffusion models in audio, vision, ... 
-One should be able to save both models and samplers as well as load them from the Hub.
+`diffusers` is more modularized than `transformers`. The idea is that researchers and engineers can use only parts of the library easily for the own use cases.
+It could become a central place for all kinds of models, schedulers, training utils and processors that one can mix and match for one's own use case.
+Both models and scredulers should be load- and saveable from the Hub.

 Example:

@@ -78,8 +78,8 @@ image_pil = PIL.Image.fromarray(image_processed[0])
 image_pil.save("test.png")
 ```

-## 2. `diffusers` as a collection of most import Diffusion models (GLIDE, Dalle, ...)
-`models` directory in repository hosts complete diffusion training code & pipelines. Easily load & saveable from the Hub. Will be possible to use just from pip `diffusers` version:
+## 2. `diffusers` as a collection of most important Diffusion systems (GLIDE, Dalle, ...)
+`models` directory in repository hosts the complete code necessary for running a diffusion system as well as to train it. A `DiffusionPipeline` class allows to easily run the diffusion model in inference:

 Example:

@@ -113,38 +113,48 @@ image_pil.save("test.png")
 │   │       ├── modeling_fastdiff.py
 │   │       ├── README.md
 │   │       └── run_fastdiff.py
+│   ├── __init__.py
 │   └── vision
 │       ├── dalle2
 │       │   ├── modeling_dalle2.py
 │       │   ├── README.md
 │       │   └── run_dalle2.py
 │       ├── ddpm
+│       │   ├── example.py
 │       │   ├── modeling_ddpm.py
 │       │   ├── README.md
 │       │   └── run_ddpm.py
 │       ├── glide
 │       │   ├── modeling_glide.py
+│       │   ├── modeling_vqvae.py.py
 │       │   ├── README.md
-│       │   └── run_dalle2.py
+│       │   └── run_glide.py
 │       ├── imagen
 │       │   ├── modeling_dalle2.py
 │       │   ├── README.md
 │       │   └── run_dalle2.py
+│       ├── __init__.py
 │       └── latent_diffusion
 │           ├── modeling_latent_diffusion.py
 │           ├── README.md
 │           └── run_latent_diffusion.py
-
+├── pyproject.toml
+├── README.md
+├── setup.cfg
+├── setup.py
 ├── src
 │   └── diffusers
 │       ├── configuration_utils.py
 │       ├── __init__.py
 │       ├── modeling_utils.py
 │       ├── models
+│       │   ├── __init__.py
+│       │   ├── unet_glide.py
 │       │   └── unet.py
-│       ├── processors
+│       ├── pipeline_utils.py
 │       └── schedulers
 │           ├── gaussian_ddpm.py
+│           ├── __init__.py
 ├── tests
 │   └── test_modeling_utils.py
 ```
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+
 import random
 import tempfile
 import unittest
@@ -28,6 +29,7 @@ from models.vision.ddpm.modeling_ddpm import DDPM

 global_rng = random.Random()
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+torch.backends.cuda.matmul.allow_tf32 = False


 def parse_flag_from_env(key, default=False):
@@ -113,8 +115,7 @@ class SamplerTesterMixin(unittest.TestCase):

    @slow
    def test_sample(self):
-        generator = torch.Generator()
-        generator = generator.manual_seed(6694729458485568)
+        generator = torch.manual_seed(0)

        # 1. Load models
        scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church")
@@ -159,18 +160,17 @@ class SamplerTesterMixin(unittest.TestCase):

        assert image.shape == (1, 3, 256, 256)
        image_slice = image[0, -1, -3:, -3:].cpu()
-        assert (image_slice - torch.tensor([[-0.0598, -0.0611, -0.0506], [-0.0726, 0.0220, 0.0103], [-0.0723, -0.1310, -0.2458]])).abs().sum() < 1e-3
+        expected_slice = torch.tensor([-0.1636, -0.1765, -0.1968, -0.1338, -0.1432, -0.1622, -0.1793, -0.2001, -0.2280])
+        assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2

    def test_sample_fast(self):
        # 1. Load models
-        generator = torch.Generator()
-        generator = generator.manual_seed(6694729458485568)
+        generator = torch.manual_seed(0)

        scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church", timesteps=10)
        model = UNetModel.from_pretrained("fusing/ddpm-lsun-church").to(torch_device)

        # 2. Sample gaussian noise
-        torch.manual_seed(0)
        image = scheduler.sample_noise((1, model.in_channels, model.resolution, model.resolution), device=torch_device, generator=generator)

        # 3. Denoise
@@ -200,7 +200,8 @@ class SamplerTesterMixin(unittest.TestCase):

        assert image.shape == (1, 3, 256, 256)
        image_slice = image[0, -1, -3:, -3:].cpu()
-        assert (image_slice - torch.tensor([[0.1746, 0.5125, -0.7920], [-0.5734, -0.2910, -0.1984], [0.4090, -0.7740, -0.3941]])).abs().sum() < 1e-3
+        expected_slice = torch.tensor([-0.0304, -0.1895, -0.2436, -0.9837, -0.5422, 0.1931, -0.8175, 0.0862, -0.7783])
+        assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2


 class PipelineTesterMixin(unittest.TestCase):
@@ -215,16 +216,14 @@ class PipelineTesterMixin(unittest.TestCase):
            ddpm.save_pretrained(tmpdirname)
            new_ddpm = DDPM.from_pretrained(tmpdirname)

-        generator = torch.Generator()
-        generator = generator.manual_seed(669472945848556)
+        generator = torch.manual_seed(0)

        image = ddpm(generator=generator)
-        generator = generator.manual_seed(669472945848556)
+        generator = generator.manual_seed(0)
        new_image = new_ddpm(generator=generator)

        assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass"

-
    @slow
    def test_from_pretrained_hub(self):
        model_path = "fusing/ddpm-cifar10"
@@ -235,12 +234,10 @@ class PipelineTesterMixin(unittest.TestCase):
        ddpm.noise_scheduler.num_timesteps = 10
        ddpm_from_hub.noise_scheduler.num_timesteps = 10

-
-        generator = torch.Generator(device=torch_device)
-        generator = generator.manual_seed(669472945848556)
+        generator = torch.manual_seed(0)

        image = ddpm(generator=generator)
-        generator = generator.manual_seed(669472945848556)
+        generator = generator.manual_seed(0)
        new_image = ddpm_from_hub(generator=generator)

        assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass"