SNR gamma fixes for v_prediction training (#5106)

Co-authored-by: bghira <bghira@users.github.com>

SNR gamma fixes for v_prediction training (#5106)
Co-authored-by: bghira <bghira@users.github.com>
24563ca6 · Bagheera · GitHub · 914586f5 · 24563ca6 · 24563ca6
Unverified Commit 24563ca6 authored Sep 20, 2023 by Bagheera Committed by GitHub Sep 20, 2023
5 changed files
--- a/examples/controlnet/train_controlnet_flax.py
+++ b/examples/controlnet/train_controlnet_flax.py
@@ -908,6 +908,9 @@ def main():
            if args.snr_gamma is not None:
                snr = jnp.array(compute_snr(timesteps))
                snr_loss_weights = jnp.where(snr < args.snr_gamma, snr, jnp.ones_like(snr) * args.snr_gamma) / snr
+                if noise_scheduler.config.prediction_type == "v_prediction":
+                    # velocity objective prediction requires SNR weights to be floored to a min value of 1.
+                    snr_loss_weights = snr_loss_weights + 1
                loss = loss * snr_loss_weights
            loss = loss.mean()

--- a/examples/research_projects/onnxruntime/text_to_image/train_text_to_image.py
+++ b/examples/research_projects/onnxruntime/text_to_image/train_text_to_image.py
@@ -875,6 +875,9 @@ def main():
                    mse_loss_weights = (
                        torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
                    )
+                    if noise_scheduler.config.prediction_type == "v_prediction":
+                        # velocity objective prediction requires SNR weights to be floored to a min value of 1.
+                        mse_loss_weights = mse_loss_weights + 1
                    # We first calculate the original loss. Then we mean over the non-batch dimensions and
                    # rebalance the sample-wise losses with their respective loss weights.
                    # Finally, we take the mean of the rebalanced loss.

--- a/examples/text_to_image/train_text_to_image.py
+++ b/examples/text_to_image/train_text_to_image.py
@@ -955,6 +955,9 @@ def main():
                    mse_loss_weights = (
                        torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
                    )
+                    if noise_scheduler.config.prediction_type == "v_prediction":
+                        # velocity objective prediction requires SNR weights to be floored to a min value of 1.
+                        mse_loss_weights = mse_loss_weights + 1
                    # We first calculate the original loss. Then we mean over the non-batch dimensions and
                    # rebalance the sample-wise losses with their respective loss weights.
                    # Finally, we take the mean of the rebalanced loss.

--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -786,6 +786,9 @@ def main():
                    mse_loss_weights = (
                        torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
                    )
+                    if noise_scheduler.config.prediction_type == "v_prediction":
+                        # velocity objective prediction requires SNR weights to be floored to a min value of 1.
+                        mse_loss_weights = mse_loss_weights + 1
                    # We first calculate the original loss. Then we mean over the non-batch dimensions and
                    # rebalance the sample-wise losses with their respective loss weights.
                    # Finally, we take the mean of the rebalanced loss.

--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -1075,6 +1075,9 @@ def main(args):
                    mse_loss_weights = (
                        torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
                    )
+                    if noise_scheduler.config.prediction_type == "v_prediction":
+                        # velocity objective prediction requires SNR weights to be floored to a min value of 1.
+                        mse_loss_weights = mse_loss_weights + 1
                    # We first calculate the original loss. Then we mean over the non-batch dimensions and
                    # rebalance the sample-wise losses with their respective loss weights.
                    # Finally, we take the mean of the rebalanced loss.