[CI] Fix Fast GPU tests on PR (#10912)

* update * update * update * update * update --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>

[CI] Fix Fast GPU tests on PR (#10912)
* update * update * update * update * update --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
e5c43b8a · Dhruv Nair · GitHub · 9a8e8db7 · e5c43b8a · e5c43b8a
Unverified Commit e5c43b8a authored Feb 27, 2025 by Dhruv Nair Committed by GitHub Feb 27, 2025
4 changed files
--- a/.github/workflows/pr_tests_gpu.yml
+++ b/.github/workflows/pr_tests_gpu.yml
@@ -11,6 +11,8 @@ on:
      - "src/diffusers/loaders/lora_base.py"
      - "src/diffusers/loaders/lora_pipeline.py"
      - "src/diffusers/loaders/peft.py"
+      - "tests/pipelines/test_pipelines_common.py"
+      - "tests/models/test_modeling_common.py"
  workflow_dispatch:

 concurrency:

--- a/tests/models/test_modeling_common.py
+++ b/tests/models/test_modeling_common.py
@@ -1169,17 +1169,16 @@ class ModelTesterMixin:
        base_output = model(**inputs_dict)

        model_size = compute_module_sizes(model)[""]
+        max_size = int(self.model_split_percents[0] * model_size)
+        # Force disk offload by setting very small CPU memory
+        max_memory = {0: max_size, "cpu": int(0.1 * max_size)}
+
        with tempfile.TemporaryDirectory() as tmp_dir:
            model.cpu().save_pretrained(tmp_dir, safe_serialization=False)
-
            with self.assertRaises(ValueError):
-                max_size = int(self.model_split_percents[0] * model_size)
-                max_memory = {0: max_size, "cpu": max_size}
                # This errors out because it's missing an offload folder
                new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)

-            max_size = int(self.model_split_percents[0] * model_size)
-            max_memory = {0: max_size, "cpu": max_size}
            new_model = self.model_class.from_pretrained(
                tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir
            )

--- a/tests/models/transformers/test_models_transformer_omnigen.py
+++ b/tests/models/transformers/test_models_transformer_omnigen.py
@@ -30,6 +30,7 @@ class OmniGenTransformerTests(ModelTesterMixin, unittest.TestCase):
    model_class = OmniGenTransformer2DModel
    main_input_name = "hidden_states"
    uses_custom_attn_processor = True
+    model_split_percents = [0.1, 0.1, 0.1]

    @property
    def dummy_input(self):
@@ -73,9 +74,9 @@ class OmniGenTransformerTests(ModelTesterMixin, unittest.TestCase):
            "num_attention_heads": 4,
            "num_key_value_heads": 4,
            "intermediate_size": 32,
-            "num_layers": 1,
+            "num_layers": 20,
            "pad_token_id": 0,
-            "vocab_size": 100,
+            "vocab_size": 1000,
            "in_channels": 4,
            "time_step_dim": 4,
            "rope_scaling": {"long_factor": list(range(1, 3)), "short_factor": list(range(1, 3))},

--- a/tests/models/transformers/test_models_transformer_sd3.py
+++ b/tests/models/transformers/test_models_transformer_sd3.py
@@ -33,6 +33,7 @@ enable_full_determinism()
 class SD3TransformerTests(ModelTesterMixin, unittest.TestCase):
    model_class = SD3Transformer2DModel
    main_input_name = "hidden_states"
+    model_split_percents = [0.8, 0.8, 0.9]

    @property
    def dummy_input(self):
@@ -67,7 +68,7 @@ class SD3TransformerTests(ModelTesterMixin, unittest.TestCase):
            "sample_size": 32,
            "patch_size": 1,
            "in_channels": 4,
-            "num_layers": 1,
+            "num_layers": 4,
            "attention_head_dim": 8,
            "num_attention_heads": 4,
            "caption_projection_dim": 32,
@@ -107,6 +108,7 @@ class SD3TransformerTests(ModelTesterMixin, unittest.TestCase):
 class SD35TransformerTests(ModelTesterMixin, unittest.TestCase):
    model_class = SD3Transformer2DModel
    main_input_name = "hidden_states"
+    model_split_percents = [0.8, 0.8, 0.9]

    @property
    def dummy_input(self):
@@ -141,7 +143,7 @@ class SD35TransformerTests(ModelTesterMixin, unittest.TestCase):
            "sample_size": 32,
            "patch_size": 1,
            "in_channels": 4,
-            "num_layers": 2,
+            "num_layers": 4,
            "attention_head_dim": 8,
            "num_attention_heads": 4,
            "caption_projection_dim": 32,