Support for Offset Noise in examples (#2753)

* add noise offset * make style

Support for Offset Noise in examples (#2753)
* add noise offset * make style
e0d8c9ef · Haofan Wang · GitHub · 92e1164e · e0d8c9ef · e0d8c9ef
Unverified Commit e0d8c9ef authored Mar 23, 2023 by Haofan Wang Committed by GitHub Mar 23, 2023
Showing with 14 additions and 0 deletions

examples/text_to_image/train_text_to_image.py examples/text_to_image/train_text_to_image.py +7 -0

examples/text_to_image/train_text_to_image_lora.py examples/text_to_image/train_text_to_image_lora.py +7 -0

No files found.
--- a/examples/text_to_image/train_text_to_image.py
+++ b/examples/text_to_image/train_text_to_image.py
@@ -297,6 +297,7 @@ def parse_args():
    parser.add_argument(
        "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
    )
+    parser.add_argument("--noise_offset", type=float, default=0, help="The scale of noise offset.")
    args = parser.parse_args()
    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
@@ -705,6 +706,12 @@ def main():
                # Sample noise that we'll add to the latents
                noise = torch.randn_like(latents)
+                if args.noise_offset:
+                    # https://www.crosslabs.org//blog/diffusion-with-offset-noise
+                    noise += args.noise_offset * torch.randn(
+                        (latents.shape[0], latents.shape[1], 1, 1), device=latents.device
+                    )
                bsz = latents.shape[0]
                # Sample a random timestep for each image
                timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (bsz,), device=latents.device)

--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -333,6 +333,7 @@ def parse_args():
    parser.add_argument(
        "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
    )
+    parser.add_argument("--noise_offset", type=float, default=0, help="The scale of noise offset.")
    args = parser.parse_args()
    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
@@ -718,6 +719,12 @@ def main():
                # Sample noise that we'll add to the latents
                noise = torch.randn_like(latents)
+                if args.noise_offset:
+                    # https://www.crosslabs.org//blog/diffusion-with-offset-noise
+                    noise += args.noise_offset * torch.randn(
+                        (latents.shape[0], latents.shape[1], 1, 1), device=latents.device
+                    )
                bsz = latents.shape[0]
                # Sample a random timestep for each image
                timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (bsz,), device=latents.device)