Fix llama + gemma accelete tests (#29380)

cec77334 · Marc Sun · GitHub · 15f8296a · cec77334 · cec77334
Unverified Commit cec77334 authored Mar 01, 2024 by Marc Sun Committed by GitHub Mar 01, 2024
Showing with 8 additions and 0 deletions

tests/models/gemma/test_modeling_gemma.py tests/models/gemma/test_modeling_gemma.py +4 -0

tests/models/llama/test_modeling_llama.py tests/models/llama/test_modeling_llama.py +4 -0

No files found.
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@@ -298,6 +298,10 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    test_headmasking = False
    test_pruning = False
+    # Need to remove 0.9 in `test_cpu_offload`
+    # This is because we are hitting edge cases with the causal_mask buffer
+    model_split_percents = [0.5, 0.6]
    # TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79245/workflows/9490ef58-79c2-410d-8f51-e3495156cf9c/jobs/1012146
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name

--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -302,6 +302,10 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    test_pruning = False
    fx_compatible = True
+    # Need to use `0.8` instead of `0.9` for `test_cpu_offload`
+    # This is because we are hitting edge cases with the causal_mask buffer
+    model_split_percents = [0.5, 0.7, 0.8]
    def setUp(self):
        self.model_tester = LlamaModelTester(self)
        self.config_tester = ConfigTester(self, config_class=LlamaConfig, hidden_size=37)