Commit d1715d33 authored by anton-l's avatar anton-l
Browse files

Merge branch 'main' of github.com:huggingface/diffusers

parents db2a1077 3e801673
......@@ -7,8 +7,8 @@
![model_diff_1_50](https://user-images.githubusercontent.com/23423619/171610307-dab0cd8b-75da-4d4e-9f5a-5922072e2bb5.png)
**Samplers**: Algorithm to *train* and *sample* from **Model**. Defines alpha and beta schedule, timesteps, etc..
*Example: Vanilla DDPM, DDIM, PMLS, DEIN*
**Schedulers**: Algorithm to sample noise schedule for both *training* and *inference*. Defines alpha and beta schedule, timesteps, etc..
*Example: Gaussian DDPM, DDIM, PMLS, DEIN*
![sampling](https://user-images.githubusercontent.com/23423619/171608981-3ad05953-a684-4c82-89f8-62a459147a07.png)
![training](https://user-images.githubusercontent.com/23423619/171608964-b3260cce-e6b4-4841-959d-7d8ba4b8d1b2.png)
......@@ -20,9 +20,9 @@
## 1. `diffusers` as a central modular diffusion and sampler library
`diffusers` should be more modularized than `transformers` so that parts of it can be easily used in other libraries.
It could become a central place for all kinds of models, schedulers, training utils and processors required when using diffusion models in audio, vision, ...
One should be able to save both models and samplers as well as load them from the Hub.
`diffusers` is more modularized than `transformers`. The idea is that researchers and engineers can use only parts of the library easily for the own use cases.
It could become a central place for all kinds of models, schedulers, training utils and processors that one can mix and match for one's own use case.
Both models and scredulers should be load- and saveable from the Hub.
Example:
......@@ -78,8 +78,8 @@ image_pil = PIL.Image.fromarray(image_processed[0])
image_pil.save("test.png")
```
## 2. `diffusers` as a collection of most import Diffusion models (GLIDE, Dalle, ...)
`models` directory in repository hosts complete diffusion training code & pipelines. Easily load & saveable from the Hub. Will be possible to use just from pip `diffusers` version:
## 2. `diffusers` as a collection of most important Diffusion systems (GLIDE, Dalle, ...)
`models` directory in repository hosts the complete code necessary for running a diffusion system as well as to train it. A `DiffusionPipeline` class allows to easily run the diffusion model in inference:
Example:
......@@ -113,38 +113,48 @@ image_pil.save("test.png")
│   │   ├── modeling_fastdiff.py
│   │   ├── README.md
│   │   └── run_fastdiff.py
│   ├── __init__.py
│   └── vision
│   ├── dalle2
│   │   ├── modeling_dalle2.py
│   │   ├── README.md
│   │   └── run_dalle2.py
│   ├── ddpm
│   │   ├── example.py
│   │   ├── modeling_ddpm.py
│   │   ├── README.md
│   │   └── run_ddpm.py
│   ├── glide
│   │   ├── modeling_glide.py
│   │   ├── modeling_vqvae.py.py
│   │   ├── README.md
│   │   └── run_dalle2.py
│   │   └── run_glide.py
│   ├── imagen
│   │   ├── modeling_dalle2.py
│   │   ├── README.md
│   │   └── run_dalle2.py
│   ├── __init__.py
│   └── latent_diffusion
│   ├── modeling_latent_diffusion.py
│   ├── README.md
│   └── run_latent_diffusion.py
├── pyproject.toml
├── README.md
├── setup.cfg
├── setup.py
├── src
│   └── diffusers
│   ├── configuration_utils.py
│   ├── __init__.py
│   ├── modeling_utils.py
│   ├── models
│   │   ├── __init__.py
│   │   ├── unet_glide.py
│   │   └── unet.py
│   ├── processors
│   ├── pipeline_utils.py
│   └── schedulers
│   ├── gaussian_ddpm.py
│   ├── __init__.py
├── tests
│   └── test_modeling_utils.py
```
......@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import tempfile
import unittest
......@@ -28,6 +29,7 @@ from models.vision.ddpm.modeling_ddpm import DDPM
global_rng = random.Random()
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
torch.backends.cuda.matmul.allow_tf32 = False
def parse_flag_from_env(key, default=False):
......@@ -113,8 +115,7 @@ class SamplerTesterMixin(unittest.TestCase):
@slow
def test_sample(self):
generator = torch.Generator()
generator = generator.manual_seed(6694729458485568)
generator = torch.manual_seed(0)
# 1. Load models
scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church")
......@@ -159,18 +160,17 @@ class SamplerTesterMixin(unittest.TestCase):
assert image.shape == (1, 3, 256, 256)
image_slice = image[0, -1, -3:, -3:].cpu()
assert (image_slice - torch.tensor([[-0.0598, -0.0611, -0.0506], [-0.0726, 0.0220, 0.0103], [-0.0723, -0.1310, -0.2458]])).abs().sum() < 1e-3
expected_slice = torch.tensor([-0.1636, -0.1765, -0.1968, -0.1338, -0.1432, -0.1622, -0.1793, -0.2001, -0.2280])
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
def test_sample_fast(self):
# 1. Load models
generator = torch.Generator()
generator = generator.manual_seed(6694729458485568)
generator = torch.manual_seed(0)
scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church", timesteps=10)
model = UNetModel.from_pretrained("fusing/ddpm-lsun-church").to(torch_device)
# 2. Sample gaussian noise
torch.manual_seed(0)
image = scheduler.sample_noise((1, model.in_channels, model.resolution, model.resolution), device=torch_device, generator=generator)
# 3. Denoise
......@@ -200,7 +200,8 @@ class SamplerTesterMixin(unittest.TestCase):
assert image.shape == (1, 3, 256, 256)
image_slice = image[0, -1, -3:, -3:].cpu()
assert (image_slice - torch.tensor([[0.1746, 0.5125, -0.7920], [-0.5734, -0.2910, -0.1984], [0.4090, -0.7740, -0.3941]])).abs().sum() < 1e-3
expected_slice = torch.tensor([-0.0304, -0.1895, -0.2436, -0.9837, -0.5422, 0.1931, -0.8175, 0.0862, -0.7783])
assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2
class PipelineTesterMixin(unittest.TestCase):
......@@ -215,16 +216,14 @@ class PipelineTesterMixin(unittest.TestCase):
ddpm.save_pretrained(tmpdirname)
new_ddpm = DDPM.from_pretrained(tmpdirname)
generator = torch.Generator()
generator = generator.manual_seed(669472945848556)
generator = torch.manual_seed(0)
image = ddpm(generator=generator)
generator = generator.manual_seed(669472945848556)
generator = generator.manual_seed(0)
new_image = new_ddpm(generator=generator)
assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass"
@slow
def test_from_pretrained_hub(self):
model_path = "fusing/ddpm-cifar10"
......@@ -235,12 +234,10 @@ class PipelineTesterMixin(unittest.TestCase):
ddpm.noise_scheduler.num_timesteps = 10
ddpm_from_hub.noise_scheduler.num_timesteps = 10
generator = torch.Generator(device=torch_device)
generator = generator.manual_seed(669472945848556)
generator = torch.manual_seed(0)
image = ddpm(generator=generator)
generator = generator.manual_seed(669472945848556)
generator = generator.manual_seed(0)
new_image = ddpm_from_hub(generator=generator)
assert (image - new_image).abs().sum() < 1e-5, "Models don't give the same forward pass"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment