Unverified Commit 27c7f971 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

[tests] fix `static` cache implementation is not compatible with...

[tests] fix `static` cache implementation is not compatible with `attn_implementation==flash_attention_2` (#32039)

* add flash attention check

* fix

* fix
parent 5f841c74
......@@ -290,7 +290,7 @@ class CacheIntegrationTest(unittest.TestCase):
self.assertTrue(decoded[0].endswith(last_output))
@require_torch_gpu
@parameterized.expand(["eager", "sdpa", "flash_attention_2"])
@parameterized.expand(["eager", "sdpa"])
def test_static_cache_greedy_decoding_pad_left(self, attn_implementation):
EXPECTED_GENERATION = [
"The best color is the one that complements the skin tone of the",
......@@ -330,7 +330,7 @@ class CacheIntegrationTest(unittest.TestCase):
self.assertListEqual(decoded, EXPECTED_GENERATION)
@require_torch_gpu
@parameterized.expand(["eager", "sdpa", "flash_attention_2"])
@parameterized.expand(["eager", "sdpa"])
def test_static_cache_greedy_decoding_pad_right(self, attn_implementation):
EXPECTED_GENERATION = [
"The best color isЋ the one that complements the skin tone of",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment