Mark `test_eager_matches_sdpa_generate` flaky for some models (#29479)

* fix * revert for qwen2 * revert for qwen2 * update * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Mark `test_eager_matches_sdpa_generate` flaky for some models (#29479)
* fix * revert for qwen2 * revert for qwen2 * update * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
43d17c18 · Yih-Dar · GitHub · ba56ed08 · 43d17c18 · 43d17c18
Unverified Commit 43d17c18 authored Mar 29, 2024 by Yih-Dar Committed by GitHub Mar 29, 2024
6 changed files
--- a/tests/models/falcon/test_modeling_falcon.py
+++ b/tests/models/falcon/test_modeling_falcon.py
@@ -27,7 +27,14 @@ from transformers import (
    is_torch_available,
    set_seed,
 )
-from transformers.testing_utils import require_bitsandbytes, require_torch, require_torch_sdpa, slow, torch_device
+from transformers.testing_utils import (
+    is_flaky,
+    require_bitsandbytes,
+    require_torch,
+    require_torch_sdpa,
+    slow,
+    torch_device,
+)
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -503,6 +510,8 @@ class FalconModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
            torch.testing.assert_close(ntk_sin_long, original_sin_long)
        self.assertTrue((ntk_scaling_rope.inv_freq <= original_rope.inv_freq).all())
+    # TODO: @Fxmarty
+    @is_flaky(max_attempts=3, description="flaky on some models.")
    @require_torch_sdpa
    @slow
    def test_eager_matches_sdpa_generate(self):

--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -24,6 +24,7 @@ import pytest
 from transformers import AutoTokenizer, MistralConfig, is_torch_available, set_seed
 from transformers.testing_utils import (
    backend_empty_cache,
+    is_flaky,
    require_bitsandbytes,
    require_flash_attn,
    require_torch,
@@ -309,6 +310,13 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
    ):
        return True
+    # TODO: @Fxmarty
+    @is_flaky(max_attempts=3, description="flaky on some models.")
+    @require_torch_sdpa
+    @slow
+    def test_eager_matches_sdpa_generate(self):
+        super().test_eager_matches_sdpa_generate()
    def setUp(self):
        self.model_tester = MistralModelTester(self)
        self.config_tester = ConfigTester(self, config_class=MistralConfig, hidden_size=37)

--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -22,9 +22,11 @@ import pytest
 from transformers import MixtralConfig, is_torch_available
 from transformers.testing_utils import (
+    is_flaky,
    require_flash_attn,
    require_torch,
    require_torch_gpu,
+    require_torch_sdpa,
    slow,
    torch_device,
 )
@@ -307,6 +309,13 @@ class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
    ):
        return True
+    # TODO: @Fxmarty
+    @is_flaky(max_attempts=3, description="flaky on some models.")
+    @require_torch_sdpa
+    @slow
+    def test_eager_matches_sdpa_generate(self):
+        super().test_eager_matches_sdpa_generate()
    def setUp(self):
        self.model_tester = MixtralModelTester(self)
        self.config_tester = ConfigTester(self, config_class=MixtralConfig, hidden_size=37)

--- a/tests/models/qwen2/test_modeling_qwen2.py
+++ b/tests/models/qwen2/test_modeling_qwen2.py
@@ -320,6 +320,14 @@ class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    ):
        return True
+    # Ignore copy
+    # TODO: @Fxmarty
+    @require_torch_sdpa
+    @slow
+    @unittest.skip(reason="Currently failing.")
+    def test_eager_matches_sdpa_generate(self):
+        super().test_eager_matches_sdpa_generate()
    def setUp(self):
        self.model_tester = Qwen2ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=Qwen2Config, hidden_size=37)

--- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
+++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
@@ -349,6 +349,12 @@ class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
    ):
        return True
+    # Ignore copy
+    @require_torch_sdpa
+    @slow
+    def test_eager_matches_sdpa_generate(self):
+        super().test_eager_matches_sdpa_generate()
    def setUp(self):
        self.model_tester = Qwen2MoeModelTester(self)
        self.config_tester = ConfigTester(self, config_class=Qwen2MoeConfig, hidden_size=37)

--- a/tests/models/stablelm/test_modeling_stablelm.py
+++ b/tests/models/stablelm/test_modeling_stablelm.py
@@ -21,6 +21,7 @@ from parameterized import parameterized
 from transformers import StableLmConfig, is_torch_available, set_seed
 from transformers.testing_utils import (
+    is_flaky,
    require_bitsandbytes,
    require_flash_attn,
    require_torch,
@@ -500,6 +501,8 @@ class StableLmModelIntegrationTest(unittest.TestCase):
        self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-3:].tolist())
    # Copied from transformers.tests.models.llama.test_modeling_llama.LlamaModelTest.test_eager_matches_sdpa_generate with Llama->StableLm,saibo/llama-1B->stabilityai/stablelm-3b-4e1t
+    # TODO: @Fxmarty
+    @is_flaky(max_attempts=3, description="flaky on some models.")
    @require_torch_sdpa
    @slow
    def test_eager_matches_sdpa_generate(self):