"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "c9693db2fcd6876bfc4b00dd9088808896fff94c"
Unverified Commit fd3238b4 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix `MistralIntegrationTest` (#31231)



* fix

* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 2965b204
...@@ -526,7 +526,7 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -526,7 +526,7 @@ class MistralIntegrationTest(unittest.TestCase):
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
# considering differences in hardware processing and potential deviations in output. # considering differences in hardware processing and potential deviations in output.
EXPECTED_SLICE = { EXPECTED_SLICE = {
7: torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]), 7: torch.tensor([-5.8828, -5.8633, -0.1042, -4.7266, -5.8828, -5.8789, -5.8789, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -1.0801, 1.7598, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828]),
8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]), 8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]),
9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]), 9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]),
} # fmt: skip } # fmt: skip
...@@ -535,15 +535,11 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -535,15 +535,11 @@ class MistralIntegrationTest(unittest.TestCase):
out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4 out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4
) )
del model
backend_empty_cache(torch_device)
gc.collect()
@slow @slow
@require_bitsandbytes @require_bitsandbytes
def test_model_7b_generation(self): def test_model_7b_generation(self):
EXPECTED_TEXT_COMPLETION = { EXPECTED_TEXT_COMPLETION = {
7: "My favourite condiment is 100% ketchup. I love it on everything. I'm not a big", 7: "My favourite condiment is 100% ketchup. I’m not a fan of mustard, mayo,",
8: "My favourite condiment is 100% ketchup. I’m not a fan of mustard, mayo,", 8: "My favourite condiment is 100% ketchup. I’m not a fan of mustard, mayo,",
} }
...@@ -559,10 +555,6 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -559,10 +555,6 @@ class MistralIntegrationTest(unittest.TestCase):
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text) self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text)
del model
backend_empty_cache(torch_device)
gc.collect()
@require_bitsandbytes @require_bitsandbytes
@slow @slow
@require_flash_attn @require_flash_attn
...@@ -587,11 +579,6 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -587,11 +579,6 @@ class MistralIntegrationTest(unittest.TestCase):
generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0) generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0)
self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist()) self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist())
del assistant_model
del model
backend_empty_cache(torch_device)
gc.collect()
@slow @slow
@require_torch_sdpa @require_torch_sdpa
def test_model_7b_long_prompt_sdpa(self): def test_model_7b_long_prompt_sdpa(self):
...@@ -635,7 +622,7 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -635,7 +622,7 @@ class MistralIntegrationTest(unittest.TestCase):
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
# considering differences in hardware processing and potential deviations in generated text. # considering differences in hardware processing and potential deviations in generated text.
EXPECTED_TEXT_COMPLETION = { EXPECTED_TEXT_COMPLETION = {
7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs", 7: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big",
8: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", 8: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big",
9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", 9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big",
} }
...@@ -654,10 +641,6 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -654,10 +641,6 @@ class MistralIntegrationTest(unittest.TestCase):
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text) self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text)
del model
backend_empty_cache(torch_device)
gc.collect()
@slow @slow
@require_read_token @require_read_token
def test_compile_static_cache(self): def test_compile_static_cache(self):
...@@ -726,10 +709,6 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -726,10 +709,6 @@ class MistralIntegrationTest(unittest.TestCase):
static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text) self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text)
del model
backend_empty_cache(torch_device)
gc.collect()
@slow @slow
@require_torch_gpu @require_torch_gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment