Actually fix the grad ckpt test (#734)

* use_deterministic_algorithms for grad ckpt test * remove eval * Apply suggestions from code review * Update tests/test_models_unet.py

Actually fix the grad ckpt test (#734)
* use_deterministic_algorithms for grad ckpt test * remove eval * Apply suggestions from code review * Update tests/test_models_unet.py
f3128c87 · Suraj Patil · GitHub · 08839682 · f3128c87
Unverified Commit f3128c87 authored Oct 06, 2022 by Suraj Patil Committed by GitHub Oct 06, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 1 deletion

tests/test_models_unet.py tests/test_models_unet.py +14 -1

No files found.
--- a/tests/test_models_unet.py
+++ b/tests/test_models_unet.py
@@ -15,6 +15,7 @@

 import gc
 import math
+import os
 import tracemalloc
 import unittest

@@ -268,8 +269,16 @@ class UNet2DConditionModelTests(ModelTesterMixin, unittest.TestCase):
        return init_dict, inputs_dict

    def test_gradient_checkpointing(self):
+        # enable deterministic behavior for gradient checkpointing
+        torch.use_deterministic_algorithms(True)
+
+        # from torch docs: "A handful of CUDA operations are nondeterministic if the CUDA version is 10.2 or greater,
+        # unless the environment variable CUBLAS_WORKSPACE_CONFIG=:4096:8 or CUBLAS_WORKSPACE_CONFIG=:16:8 is set."
+        # https://pytorch.org/docs/stable/generated/torch.use_deterministic_algorithms.html
+        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
+
        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-        model = self.model_class(**init_dict).eval()
+        model = self.model_class(**init_dict)
        model.to(torch_device)

        out = model(**inputs_dict).sample
@@ -304,6 +313,10 @@ class UNet2DConditionModelTests(ModelTesterMixin, unittest.TestCase):
        for name in grad_checkpointed:
            self.assertTrue(torch.allclose(grad_checkpointed[name], grad_not_checkpointed[name], atol=5e-5))

+        # disable deterministic behavior for gradient checkpointing
+        del os.environ["CUBLAS_WORKSPACE_CONFIG"]
+        torch.use_deterministic_algorithms(False)
+

 #    TODO(Patrick) - Re-add this test after having cleaned up LDM
 #    def test_output_pretrained_spatial_transformer(self):