Unverified Commit a185e1ab authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[tests] add a test on torch compile for varied resolutions (#11776)



* add test for checking compile on different shapes.

* update

* update

* Apply suggestions from code review
Co-authored-by: default avatarSteven Liu <59462357+stevhliu@users.noreply.github.com>

---------
Co-authored-by: default avatarSteven Liu <59462357+stevhliu@users.noreply.github.com>
parent d93381cd
......@@ -150,6 +150,28 @@ pipeline(prompt, num_inference_steps=30).images[0]
Compilation is slow the first time, but once compiled, it is significantly faster. Try to only use the compiled pipeline on the same type of inference operations. Calling the compiled pipeline on a different image size retriggers compilation which is slow and inefficient.
### Dynamic shape compilation
> [!TIP]
> Make sure to always use the nightly version of PyTorch for better support.
`torch.compile` keeps track of input shapes and conditions, and if these are different, it recompiles the model. For example, if a model is compiled on a 1024x1024 resolution image and used on an image with a different resolution, it triggers recompilation.
To avoid recompilation, add `dynamic=True` to try and generate a more dynamic kernel to avoid recompilation when conditions change.
```diff
+ torch.fx.experimental._config.use_duck_shape = False
+ pipeline.unet = torch.compile(
pipeline.unet, fullgraph=True, dynamic=True
)
```
Specifying `use_duck_shape=False` instructs the compiler if it should use the same symbolic variable to represent input sizes that are the same. For more details, check out this [comment](https://github.com/huggingface/diffusers/pull/11327#discussion_r2047659790).
Not all models may benefit from dynamic compilation out of the box and may require changes. Refer to this [PR](https://github.com/huggingface/diffusers/pull/11297/) that improved the [`AuraFlowPipeline`] implementation to benefit from dynamic compilation.
Feel free to open an issue if dynamic compilation doesn't work as expected for a Diffusers model.
### Regional compilation
......
......@@ -76,6 +76,7 @@ from diffusers.utils.testing_utils import (
require_torch_accelerator_with_training,
require_torch_gpu,
require_torch_multi_accelerator,
require_torch_version_greater,
run_test_in_subprocess,
slow,
torch_all_close,
......@@ -1907,6 +1908,8 @@ class ModelPushToHubTester(unittest.TestCase):
@is_torch_compile
@slow
class TorchCompileTesterMixin:
different_shapes_for_compilation = None
def setUp(self):
# clean up the VRAM before each test
super().setUp()
......@@ -1957,14 +1960,14 @@ class TorchCompileTesterMixin:
_ = model(**inputs_dict)
def test_compile_with_group_offloading(self):
if not self.model_class._supports_group_offloading:
pytest.skip("Model does not support group offloading.")
torch._dynamo.config.cache_size_limit = 10000
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict)
if not getattr(model, "_supports_group_offloading", True):
return
model.eval()
# TODO: Can test for other group offloading kwargs later if needed.
group_offload_kwargs = {
......@@ -1981,6 +1984,21 @@ class TorchCompileTesterMixin:
_ = model(**inputs_dict)
_ = model(**inputs_dict)
@require_torch_version_greater("2.7.1")
def test_compile_on_different_shapes(self):
if self.different_shapes_for_compilation is None:
pytest.skip(f"Skipping as `different_shapes_for_compilation` is not set for {self.__class__.__name__}.")
torch.fx.experimental._config.use_duck_shape = False
init_dict, _ = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device)
model = torch.compile(model, fullgraph=True, dynamic=True)
for height, width in self.different_shapes_for_compilation:
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
inputs_dict = self.prepare_dummy_input(height=height, width=width)
_ = model(**inputs_dict)
@slow
@require_torch_2
......
......@@ -91,10 +91,20 @@ class FluxTransformerTests(ModelTesterMixin, unittest.TestCase):
@property
def dummy_input(self):
return self.prepare_dummy_input()
@property
def input_shape(self):
return (16, 4)
@property
def output_shape(self):
return (16, 4)
def prepare_dummy_input(self, height=4, width=4):
batch_size = 1
num_latent_channels = 4
num_image_channels = 3
height = width = 4
sequence_length = 48
embedding_dim = 32
......@@ -114,14 +124,6 @@ class FluxTransformerTests(ModelTesterMixin, unittest.TestCase):
"timestep": timestep,
}
@property
def input_shape(self):
return (16, 4)
@property
def output_shape(self):
return (16, 4)
def prepare_init_args_and_inputs_for_common(self):
init_dict = {
"patch_size": 1,
......@@ -173,10 +175,14 @@ class FluxTransformerTests(ModelTesterMixin, unittest.TestCase):
class FluxTransformerCompileTests(TorchCompileTesterMixin, unittest.TestCase):
model_class = FluxTransformer2DModel
different_shapes_for_compilation = [(4, 4), (4, 8), (8, 8)]
def prepare_init_args_and_inputs_for_common(self):
return FluxTransformerTests().prepare_init_args_and_inputs_for_common()
def prepare_dummy_input(self, height, width):
return FluxTransformerTests().prepare_dummy_input(height=height, width=width)
class FluxTransformerLoRAHotSwapTests(LoraHotSwappingForModelTesterMixin, unittest.TestCase):
model_class = FluxTransformer2DModel
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment